From 11c624e488663f4f7554d1f92a072c7caee3908e Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 21 Jan 2019 16:59:10 +0100 Subject: Refactor stdsimd This commit: * renames `coresimd` to `core_arch` and `stdsimd` to `std_detect` * `std_detect` does no longer depend on `core_arch` - it is a freestanding `no_std` library that only depends on `core` - it is renamed to `std_detect` * moves the top-level coresimd and stdsimd directories into the appropriate crates/... directories - this simplifies creating crate.io releases of these crates * moves the top-level `coresimd` and `stdsimd` sub-directories into their corresponding crates in `crates/{core_arch, std_detect}`. --- .../crates/std_detect/src/detect/arch/aarch64.rs | 103 ++++++ .../crates/std_detect/src/detect/arch/arm.rs | 36 +++ .../crates/std_detect/src/detect/arch/mips.rs | 26 ++ .../crates/std_detect/src/detect/arch/mips64.rs | 26 ++ .../crates/std_detect/src/detect/arch/powerpc.rs | 39 +++ .../crates/std_detect/src/detect/arch/powerpc64.rs | 39 +++ .../crates/std_detect/src/detect/arch/x86.rs | 331 +++++++++++++++++++ .../stdarch/crates/std_detect/src/detect/bit.rs | 9 + .../stdarch/crates/std_detect/src/detect/cache.rs | 162 ++++++++++ .../crates/std_detect/src/detect/error_macros.rs | 150 +++++++++ .../stdarch/crates/std_detect/src/detect/mod.rs | 85 +++++ .../crates/std_detect/src/detect/os/aarch64.rs | 79 +++++ .../std_detect/src/detect/os/freebsd/aarch64.rs | 28 ++ .../crates/std_detect/src/detect/os/freebsd/mod.rs | 14 + .../std_detect/src/detect/os/linux/aarch64.rs | 157 +++++++++ .../crates/std_detect/src/detect/os/linux/arm.rs | 49 +++ .../std_detect/src/detect/os/linux/auxvec.rs | 270 ++++++++++++++++ .../std_detect/src/detect/os/linux/cpuinfo.rs | 301 +++++++++++++++++ .../crates/std_detect/src/detect/os/linux/mips.rs | 31 ++ .../crates/std_detect/src/detect/os/linux/mod.rs | 26 ++ .../std_detect/src/detect/os/linux/powerpc.rs | 41 +++ .../crates/std_detect/src/detect/os/other.rs | 9 + .../stdarch/crates/std_detect/src/detect/os/x86.rs | 357 +++++++++++++++++++++ .../src/detect/test_data/linux-rpi3.auxv | Bin 0 -> 160 bytes .../src/detect/test_data/linux-x64-i7-6850k.auxv | Bin 0 -> 304 bytes .../macos-virtualbox-linux-x86-4850HQ.auxv | Bin 0 -> 160 bytes library/stdarch/crates/std_detect/src/lib.rs | 37 +++ library/stdarch/crates/std_detect/src/mod.rs | 5 + 28 files changed, 2410 insertions(+) create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/arm.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/mips.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/mips64.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/arch/x86.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/bit.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/cache.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/error_macros.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/mod.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/aarch64.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/other.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/x86.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv create mode 100644 library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv create mode 100644 library/stdarch/crates/std_detect/src/lib.rs create mode 100644 library/stdarch/crates/std_detect/src/mod.rs (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs new file mode 100644 index 00000000000..882c22cc174 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -0,0 +1,103 @@ +//! Aarch64 run-time features. + +/// Checks if `aarch64` feature is enabled. +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +#[allow_internal_unstable] +macro_rules! is_aarch64_feature_detected { + ("neon") => { + // FIXME: this should be removed once we rename Aarch64 neon to asimd + cfg!(target_feature = "neon") || + $crate::detect::check_for($crate::detect::Feature::asimd) + }; + ("asimd") => { + cfg!(target_feature = "neon") || + $crate::detect::check_for($crate::detect::Feature::asimd) + }; + ("pmull") => { + cfg!(target_feature = "pmull") || + $crate::detect::check_for($crate::detect::Feature::pmull) + }; + ("fp") => { + cfg!(target_feature = "fp") || + $crate::detect::check_for($crate::detect::Feature::fp) + }; + ("fp16") => { + cfg!(target_feature = "fp16") || + $crate::detect::check_for($crate::detect::Feature::fp16) + }; + ("sve") => { + cfg!(target_feature = "sve") || + $crate::detect::check_for($crate::detect::Feature::sve) + }; + ("crc") => { + cfg!(target_feature = "crc") || + $crate::detect::check_for($crate::detect::Feature::crc) + }; + ("crypto") => { + cfg!(target_feature = "crypto") || + $crate::detect::check_for($crate::detect::Feature::crypto) + }; + ("lse") => { + cfg!(target_feature = "lse") || + $crate::detect::check_for($crate::detect::Feature::lse) + }; + ("rdm") => { + cfg!(target_feature = "rdm") || + $crate::detect::check_for($crate::detect::Feature::rdm) + }; + ("rcpc") => { + cfg!(target_feature = "rcpc") || + $crate::detect::check_for($crate::detect::Feature::rcpc) + }; + ("dotprod") => { + cfg!(target_feature = "dotprod") || + $crate::detect::check_for($crate::detect::Feature::dotprod) + }; + ("ras") => { + compile_error!("\"ras\" feature cannot be detected at run-time") + }; + ("v8.1a") => { + compile_error!("\"v8.1a\" feature cannot be detected at run-time") + }; + ("v8.2a") => { + compile_error!("\"v8.2a\" feature cannot be detected at run-time") + }; + ("v8.3a") => { + compile_error!("\"v8.3a\" feature cannot be detected at run-time") + }; + ($t:tt) => { compile_error!(concat!("unknown aarch64 target feature: ", $t)) }; +} + +/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset +/// for a particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// ARM Advanced SIMD (ASIMD) + asimd, + /// Polynomial Multiply + pmull, + /// Floating point support + fp, + /// Half-float support. + fp16, + /// Scalable Vector Extension (SVE) + sve, + /// CRC32 (Cyclic Redundancy Check) + crc, + /// Crypto: AES + PMULL + SHA1 + SHA2 + crypto, + /// Atomics (Large System Extension) + lse, + /// Rounding Double Multiply (ASIMDRDM) + rdm, + /// Release consistent Processor consistent (RcPc) + rcpc, + /// Vector Dot-Product (ASIMDDP) + dotprod, +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs new file mode 100644 index 00000000000..cb6ac6badcc --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection on ARM Aarch32. + +/// Checks if `arm` feature is enabled. +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +#[allow_internal_unstable] +macro_rules! is_arm_feature_detected { + ("neon") => { + cfg!(target_feature = "neon") || + $crate::detect::check_for($crate::detect::Feature::neon) + }; + ("pmull") => { + cfg!(target_feature = "pmull") || + $crate::detect::check_for($crate::detect::Feature::pmull) + }; + ("v7") => { compile_error!("\"v7\" feature cannot be detected at run-time") }; + ("vfp2") => { compile_error!("\"vfp2\" feature cannot be detected at run-time") }; + ("vfp3") => { compile_error!("\"vfp3\" feature cannot be detected at run-time") }; + ("vfp4") => { compile_error!("\"vfp4\" feature cannot be detected at run-time") }; + ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) }; +} + +/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a +/// particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// ARM Advanced SIMD (NEON) - Aarch32 + neon, + /// Polynomial Multiply + pmull, +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs new file mode 100644 index 00000000000..876f8dde262 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -0,0 +1,26 @@ +//! Run-time feature detection on MIPS. + +/// Checks if `mips` feature is enabled. +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +#[allow_internal_unstable] +macro_rules! is_mips_feature_detected { + ("msa") => { + cfg!(target_feature = "msa") || + $crate::detect::check_for($crate::detect::Feature::msa) + }; + ($t:tt) => { compile_error!(concat!("unknown mips target feature: ", $t)) }; +} + +/// MIPS CPU Feature enum. Each variant denotes a position in a bitset for a +/// particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// MIPS SIMD Architecture (MSA) + msa, +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs new file mode 100644 index 00000000000..ab837b3d5c9 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -0,0 +1,26 @@ +//! Run-time feature detection on MIPS64. + +/// Checks if `mips64` feature is enabled. +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +#[allow_internal_unstable] +macro_rules! is_mips64_feature_detected { + ("msa") => { + cfg!(target_feature = "msa") || + $crate::detect::check_for($crate::detect::Feature::msa) + }; + ($t:tt) => { compile_error!(concat!("unknown mips64 target feature: ", $t)) }; +} + +/// MIPS64 CPU Feature enum. Each variant denotes a position in a bitset +/// for a particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// MIPS SIMD Architecture (MSA) + msa, +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs new file mode 100644 index 00000000000..9c440b1d6b0 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -0,0 +1,39 @@ +//! Run-time feature detection on PowerPC. + +/// Checks if `powerpc` feature is enabled. +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +#[allow_internal_unstable] +macro_rules! is_powerpc_feature_detected { + ("altivec") => { + cfg!(target_feature = "altivec") || + $crate::detect::check_for($crate::detect::Feature::altivec) + }; + ("vsx") => { + cfg!(target_feature = "vsx") || + $crate::detect::check_for($crate::detect::Feature::vsx) + }; + ("power8") => { + cfg!(target_feature = "power8") || + $crate::detect::check_for($crate::detect::Feature::power8) + }; + ($t:tt) => { compile_error!(concat!("unknown powerpc target feature: ", $t)) }; +} + + +/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset +/// for a particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// Altivec + altivec, + /// VSX + vsx, + /// Power8 + power8, +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs new file mode 100644 index 00000000000..910940f0bb9 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -0,0 +1,39 @@ +//! Run-time feature detection on PowerPC64. + +/// Checks if `powerpc64` feature is enabled. +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +#[allow_internal_unstable] +macro_rules! is_powerpc64_feature_detected { + ("altivec") => { + cfg!(target_feature = "altivec") || + $crate::detect::check_for($crate::detect::Feature::altivec) + }; + ("vsx") => { + cfg!(target_feature = "vsx") || + $crate::detect::check_for($crate::detect::Feature::vsx) + }; + ("power8") => { + cfg!(target_feature = "power8") || + $crate::detect::check_for($crate::detect::Feature::power8) + }; + ($t:tt) => { compile_error!(concat!("unknown powerpc64 target feature: ", $t)) }; +} + + +/// PowerPC64 CPU Feature enum. Each variant denotes a position in a bitset +/// for a particular feature. +/// +/// PLEASE: do not use this, it is an implementation detail subject to change. +#[doc(hidden)] +#[allow(non_camel_case_types)] +#[repr(u8)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// Altivec + altivec, + /// VSX + vsx, + /// Power8 + power8, +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs new file mode 100644 index 00000000000..3ef8d31d12b --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -0,0 +1,331 @@ +//! This module implements minimal run-time feature detection for x86. +//! +//! The features are detected using the `detect_features` function below. +//! This function uses the CPUID instruction to read the feature flags from the +//! CPU and encodes them in an `usize` where each bit position represents +//! whether a feature is available (bit is set) or unavaiable (bit is cleared). +//! +//! The enum `Feature` is used to map bit positions to feature names, and the +//! the `__crate::detect::check_for!` macro is used to map string literals (e.g. +//! "avx") to these bit positions (e.g. `Feature::avx`). +//! +//! +//! The run-time feature detection is performed by the +//! `__crate::detect::check_for(Feature) -> bool` function. On its first call, +//! this functions queries the CPU for the available features and stores them +//! in a global `AtomicUsize` variable. The query is performed by just checking +//! whether the feature bit in this global variable is set or cleared. + +/// A macro to test at *runtime* whether a CPU feature is available on +/// x86/x86-64 platforms. +/// +/// This macro is provided in the standard library and will detect at runtime +/// whether the specified CPU feature is detected. This does *not* resolve at +/// compile time unless the specified feature is already enabled for the entire +/// crate. Runtime detection currently relies mostly on the `cpuid` instruction. +/// +/// This macro only takes one argument which is a string literal of the feature +/// being tested for. The feature names supported are the lowercase versions of +/// the ones defined by Intel in [their documentation][docs]. +/// +/// ## Supported arguments +/// +/// This macro supports the same names that `#[target_feature]` supports. Unlike +/// `#[target_feature]`, however, this macro does not support names separated +/// with a comma. Instead testing for multiple features must be done through +/// separate macro invocations for now. +/// +/// Supported arguments are: +/// +/// * `"aes"` +/// * `"pclmulqdq"` +/// * `"rdrand"` +/// * `"rdseed"` +/// * `"tsc"` +/// * `"mmx"` +/// * `"sse"` +/// * `"sse2"` +/// * `"sse3"` +/// * `"ssse3"` +/// * `"sse4.1"` +/// * `"sse4.2"` +/// * `"sse4a"` +/// * `"sha"` +/// * `"avx"` +/// * `"avx2"` +/// * `"avx512f"` +/// * `"avx512cd"` +/// * `"avx512er"` +/// * `"avx512pf"` +/// * `"avx512bw"` +/// * `"avx512dq"` +/// * `"avx512vl"` +/// * `"avx512ifma"` +/// * `"avx512vbmi"` +/// * `"avx512vpopcntdq"` +/// * `"fma"` +/// * `"bmi1"` +/// * `"bmi2"` +/// * `"abm"` +/// * `"lzcnt"` +/// * `"tbm"` +/// * `"popcnt"` +/// * `"fxsr"` +/// * `"xsave"` +/// * `"xsaveopt"` +/// * `"xsaves"` +/// * `"xsavec"` +/// +/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide +#[macro_export] +#[stable(feature = "simd_x86", since = "1.27.0")] +#[allow_internal_unstable] +macro_rules! is_x86_feature_detected { + ("aes") => { + cfg!(target_feature = "aes") || $crate::detect::check_for( + $crate::detect::Feature::aes) }; + ("pclmulqdq") => { + cfg!(target_feature = "pclmulqdq") || $crate::detect::check_for( + $crate::detect::Feature::pclmulqdq) }; + ("rdrand") => { + cfg!(target_feature = "rdrand") || $crate::detect::check_for( + $crate::detect::Feature::rdrand) }; + ("rdseed") => { + cfg!(target_feature = "rdseed") || $crate::detect::check_for( + $crate::detect::Feature::rdseed) }; + ("tsc") => { + cfg!(target_feature = "tsc") || $crate::detect::check_for( + $crate::detect::Feature::tsc) }; + ("mmx") => { + cfg!(target_feature = "mmx") || $crate::detect::check_for( + $crate::detect::Feature::mmx) }; + ("sse") => { + cfg!(target_feature = "sse") || $crate::detect::check_for( + $crate::detect::Feature::sse) }; + ("sse2") => { + cfg!(target_feature = "sse2") || $crate::detect::check_for( + $crate::detect::Feature::sse2) + }; + ("sse3") => { + cfg!(target_feature = "sse3") || $crate::detect::check_for( + $crate::detect::Feature::sse3) + }; + ("ssse3") => { + cfg!(target_feature = "ssse3") || $crate::detect::check_for( + $crate::detect::Feature::ssse3) + }; + ("sse4.1") => { + cfg!(target_feature = "sse4.1") || $crate::detect::check_for( + $crate::detect::Feature::sse4_1) + }; + ("sse4.2") => { + cfg!(target_feature = "sse4.2") || $crate::detect::check_for( + $crate::detect::Feature::sse4_2) + }; + ("sse4a") => { + cfg!(target_feature = "sse4a") || $crate::detect::check_for( + $crate::detect::Feature::sse4a) + }; + ("sha") => { + cfg!(target_feature = "sha") || $crate::detect::check_for( + $crate::detect::Feature::sha) + }; + ("avx") => { + cfg!(target_feature = "avx") || $crate::detect::check_for( + $crate::detect::Feature::avx) + }; + ("avx2") => { + cfg!(target_feature = "avx2") || $crate::detect::check_for( + $crate::detect::Feature::avx2) + }; + ("avx512f") => { + cfg!(target_feature = "avx512f") || $crate::detect::check_for( + $crate::detect::Feature::avx512f) + }; + ("avx512cd") => { + cfg!(target_feature = "avx512cd") || $crate::detect::check_for( + $crate::detect::Feature::avx512cd) + }; + ("avx512er") => { + cfg!(target_feature = "avx512er") || $crate::detect::check_for( + $crate::detect::Feature::avx512er) + }; + ("avx512pf") => { + cfg!(target_feature = "avx512pf") || $crate::detect::check_for( + $crate::detect::Feature::avx512pf) + }; + ("avx512bw") => { + cfg!(target_feature = "avx512bw") || $crate::detect::check_for( + $crate::detect::Feature::avx512bw) + }; + ("avx512dq") => { + cfg!(target_feature = "avx512dq") || $crate::detect::check_for( + $crate::detect::Feature::avx512dq) + }; + ("avx512vl") => { + cfg!(target_Feature = "avx512vl") || $crate::detect::check_for( + $crate::detect::Feature::avx512vl) + }; + ("avx512ifma") => { + cfg!(target_feature = "avx512ifma") || $crate::detect::check_for( + $crate::detect::Feature::avx512_ifma) + }; + ("avx512vbmi") => { + cfg!(target_feature = "avx512vbmi") || $crate::detect::check_for( + $crate::detect::Feature::avx512_vbmi) + }; + ("avx512vpopcntdq") => { + cfg!(target_feature = "avx512vpopcntdq") || $crate::detect::check_for( + $crate::detect::Feature::avx512_vpopcntdq) + }; + ("fma") => { + cfg!(target_feature = "fma") || $crate::detect::check_for( + $crate::detect::Feature::fma) + }; + ("bmi1") => { + cfg!(target_feature = "bmi1") || $crate::detect::check_for( + $crate::detect::Feature::bmi) + }; + ("bmi2") => { + cfg!(target_feature = "bmi2") || $crate::detect::check_for( + $crate::detect::Feature::bmi2) + }; + ("abm") => { + cfg!(target_feature = "abm") || $crate::detect::check_for( + $crate::detect::Feature::abm) + }; + ("lzcnt") => { + cfg!(target_feature = "lzcnt") || $crate::detect::check_for( + $crate::detect::Feature::abm) + }; + ("tbm") => { + cfg!(target_feature = "tbm") || $crate::detect::check_for( + $crate::detect::Feature::tbm) + }; + ("popcnt") => { + cfg!(target_feature = "popcnt") || $crate::detect::check_for( + $crate::detect::Feature::popcnt) + }; + ("fxsr") => { + cfg!(target_feature = "fxsr") || $crate::detect::check_for( + $crate::detect::Feature::fxsr) + }; + ("xsave") => { + cfg!(target_feature = "xsave") || $crate::detect::check_for( + $crate::detect::Feature::xsave) + }; + ("xsaveopt") => { + cfg!(target_feature = "xsaveopt") || $crate::detect::check_for( + $crate::detect::Feature::xsaveopt) + }; + ("xsaves") => { + cfg!(target_feature = "xsaves") || $crate::detect::check_for( + $crate::detect::Feature::xsaves) + }; + ("xsavec") => { + cfg!(target_feature = "xsavec") || $crate::detect::check_for( + $crate::detect::Feature::xsavec) + }; + ("cmpxchg16b") => { + cfg!(target_feature = "cmpxchg16b") || $crate::detect::check_for( + $crate::detect::Feature::cmpxchg16b) + }; + ("adx") => { + cfg!(target_feature = "adx") || $crate::detect::check_for( + $crate::detect::Feature::adx) + }; + ($t:tt) => { + compile_error!(concat!("unknown target feature: ", $t)) + }; +} + +/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a +/// particular feature. +/// +/// This is an unstable implementation detail subject to change. +#[allow(non_camel_case_types)] +#[repr(u8)] +#[doc(hidden)] +#[unstable(feature = "stdsimd_internal", issue = "0")] +pub enum Feature { + /// AES (Advanced Encryption Standard New Instructions AES-NI) + aes, + /// CLMUL (Carry-less Multiplication) + pclmulqdq, + /// RDRAND + rdrand, + /// RDSEED + rdseed, + /// TSC (Time Stamp Counter) + tsc, + /// MMX + mmx, + /// SSE (Streaming SIMD Extensions) + sse, + /// SSE2 (Streaming SIMD Extensions 2) + sse2, + /// SSE3 (Streaming SIMD Extensions 3) + sse3, + /// SSSE3 (Supplemental Streaming SIMD Extensions 3) + ssse3, + /// SSE4.1 (Streaming SIMD Extensions 4.1) + sse4_1, + /// SSE4.2 (Streaming SIMD Extensions 4.2) + sse4_2, + /// SSE4a (Streaming SIMD Extensions 4a) + sse4a, + /// SHA + sha, + /// AVX (Advanced Vector Extensions) + avx, + /// AVX2 (Advanced Vector Extensions 2) + avx2, + /// AVX-512 F (Foundation) + avx512f, + /// AVX-512 CD (Conflict Detection Instructions) + avx512cd, + /// AVX-512 ER (Exponential and Reciprocal Instructions) + avx512er, + /// AVX-512 PF (Prefetch Instructions) + avx512pf, + /// AVX-512 BW (Byte and Word Instructions) + avx512bw, + /// AVX-512 DQ (Doubleword and Quadword) + avx512dq, + /// AVX-512 VL (Vector Length Extensions) + avx512vl, + /// AVX-512 IFMA (Integer Fused Multiply Add) + avx512_ifma, + /// AVX-512 VBMI (Vector Byte Manipulation Instructions) + avx512_vbmi, + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and + /// Quadword) + avx512_vpopcntdq, + /// FMA (Fused Multiply Add) + fma, + /// BMI1 (Bit Manipulation Instructions 1) + bmi, + /// BMI1 (Bit Manipulation Instructions 2) + bmi2, + /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero + /// Count) on Intel + abm, + /// TBM (Trailing Bit Manipulation) + tbm, + /// POPCNT (Population Count) + popcnt, + /// FXSR (Floating-point context fast save and restor) + fxsr, + /// XSAVE (Save Processor Extended States) + xsave, + /// XSAVEOPT (Save Processor Extended States Optimized) + xsaveopt, + /// XSAVES (Save Processor Extended States Supervisor) + xsaves, + /// XSAVEC (Save Processor Extended States Compacted) + xsavec, + /// CMPXCH16B, a 16-byte compare-and-swap instruction + cmpxchg16b, + /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + adx, +} diff --git a/library/stdarch/crates/std_detect/src/detect/bit.rs b/library/stdarch/crates/std_detect/src/detect/bit.rs new file mode 100644 index 00000000000..578f0b16b74 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/bit.rs @@ -0,0 +1,9 @@ +//! Bit manipulation utilities. + +/// Tests the `bit` of `x`. +#[allow(dead_code)] +#[inline] +pub(crate) fn test(x: usize, bit: u32) -> bool { + debug_assert!(bit < 32, "bit index out-of-bounds"); + x & (1 << bit) != 0 +} diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs new file mode 100644 index 00000000000..c2de4da7349 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -0,0 +1,162 @@ +//! Caches run-time feature detection so that it only needs to be computed +//! once. + +#![allow(dead_code)] // not used on all platforms + +use core::sync::atomic::Ordering; + +#[cfg(target_pointer_width = "64")] +use core::sync::atomic::AtomicU64; + +#[cfg(target_pointer_width = "32")] +use core::sync::atomic::AtomicU32; + +/// Sets the `bit` of `x`. +#[inline] +const fn set_bit(x: u64, bit: u32) -> u64 { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +#[inline] +const fn test_bit(x: u64, bit: u32) -> bool { + x & (1 << bit) != 0 +} + +/// Maximum number of features that can be cached. +const CACHE_CAPACITY: u32 = 63; + +/// This type is used to initialize the cache +#[derive(Copy, Clone)] +pub(crate) struct Initializer(u64); + +impl Default for Initializer { + fn default() -> Self { + Initializer(0) + } +} + +impl Initializer { + /// Tests the `bit` of the cache. + #[allow(dead_code)] + #[inline] + pub(crate) fn test(self, bit: u32) -> bool { + // FIXME: this way of making sure that the cache is large enough is + // brittle. + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + test_bit(self.0, bit) + } + + /// Sets the `bit` of the cache. + #[inline] + pub(crate) fn set(&mut self, bit: u32) { + // FIXME: this way of making sure that the cache is large enough is + // brittle. + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = set_bit(v, bit); + } +} + +/// This global variable is a cache of the features supported by the CPU. +static CACHE: Cache = Cache::uninitialized(); + +/// Feature cache with capacity for `CACHE_CAPACITY` features. +/// +/// Note: the last feature bit is used to represent an +/// uninitialized cache. +#[cfg(target_pointer_width = "64")] +struct Cache(AtomicU64); + +#[cfg(target_pointer_width = "64")] +impl Cache { + /// Creates an uninitialized cache. + const fn uninitialized() -> Self { + const X: AtomicU64 = AtomicU64::new(u64::max_value()); + Self(X) + } + /// Is the cache uninitialized? + #[inline] + pub(crate) fn is_uninitialized(&self) -> bool { + self.0.load(Ordering::Relaxed) == u64::max_value() + } + + /// Is the `bit` in the cache set? + #[inline] + pub(crate) fn test(&self, bit: u32) -> bool { + test_bit(CACHE.0.load(Ordering::Relaxed), bit) + } + + /// Initializes the cache. + #[inline] + pub(crate) fn initialize(&self, value: Initializer) { + self.0.store(value.0, Ordering::Relaxed); + } +} + +/// Feature cache with capacity for `CACHE_CAPACITY` features. +/// +/// Note: the last feature bit is used to represent an +/// uninitialized cache. +#[cfg(target_pointer_width = "32")] +struct Cache(AtomicU32, AtomicU32); + +#[cfg(target_pointer_width = "32")] +impl Cache { + /// Creates an uninitialized cache. + const fn uninitialized() -> Self { + Cache( + AtomicU32::new(u32::max_value()), + AtomicU32::new(u32::max_value()), + ) + } + /// Is the cache uninitialized? + #[inline] + pub(crate) fn is_uninitialized(&self) -> bool { + self.1.load(Ordering::Relaxed) == u32::max_value() + } + + /// Is the `bit` in the cache set? + #[inline] + pub(crate) fn test(&self, bit: u32) -> bool { + if bit < 32 { + test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit) + } else { + test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32) + } + } + + /// Initializes the cache. + #[inline] + pub(crate) fn initialize(&self, value: Initializer) { + let lo: u32 = value.0 as u32; + let hi: u32 = (value.0 >> 32) as u32; + self.0.store(lo, Ordering::Relaxed); + self.1.store(hi, Ordering::Relaxed); + } +} + +/// Test the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `f()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `CACHE` global variable as an `AtomicU64`. +/// +/// It uses the `Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +#[inline] +pub(crate) fn test(bit: u32, f: F) -> bool +where + F: FnOnce() -> Initializer, +{ + if CACHE.is_uninitialized() { + CACHE.initialize(f()); + } + CACHE.test(bit) +} diff --git a/library/stdarch/crates/std_detect/src/detect/error_macros.rs b/library/stdarch/crates/std_detect/src/detect/error_macros.rs new file mode 100644 index 00000000000..6769757ed93 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/error_macros.rs @@ -0,0 +1,150 @@ +//! The `is_{target_arch}_feature_detected!` macro are only available on their +//! architecture. These macros provide a better error messages when the user +//! attempts to call them in a different architecture. + +/// Prevents compilation if `is_x86_feature_detected` is used somewhere +/// else than `x86` and `x86_64` targets. +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_x86_feature_detected { + ($t: tt) => { + compile_error!( + r#" + is_x86_feature_detected can only be used on x86 and x86_64 targets. + You can prevent it from being used in other architectures by + guarding it behind a cfg(target_arch) as follows: + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + if is_x86_feature_detected(...) { ... } + } + "# + ) + }; +} + +/// Prevents compilation if `is_arm_feature_detected` is used somewhere else +/// than `ARM` targets. +#[cfg(not(target_arch = "arm"))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_arm_feature_detected { + ($t:tt) => { + compile_error!( + r#" + is_arm_feature_detected can only be used on ARM targets. + You can prevent it from being used in other architectures by + guarding it behind a cfg(target_arch) as follows: + + #[cfg(target_arch = "arm")] { + if is_arm_feature_detected(...) { ... } + } + "# + ) + }; +} + +/// Prevents compilation if `is_aarch64_feature_detected` is used somewhere else +/// than `aarch64` targets. +#[cfg(not(target_arch = "aarch64"))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_aarch64_feature_detected { + ($t: tt) => { + compile_error!( + r#" + is_aarch64_feature_detected can only be used on AArch64 targets. + You can prevent it from being used in other architectures by + guarding it behind a cfg(target_arch) as follows: + + #[cfg(target_arch = "aarch64")] { + if is_aarch64_feature_detected(...) { ... } + } + "# + ) + }; +} + +/// Prevents compilation if `is_powerpc_feature_detected` is used somewhere else +/// than `PowerPC` targets. +#[cfg(not(target_arch = "powerpc"))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_powerpc_feature_detected { + ($t:tt) => { + compile_error!( + r#" +is_powerpc_feature_detected can only be used on PowerPC targets. +You can prevent it from being used in other architectures by +guarding it behind a cfg(target_arch) as follows: + + #[cfg(target_arch = "powerpc")] { + if is_powerpc_feature_detected(...) { ... } + } +"# + ) + }; +} + +/// Prevents compilation if `is_powerpc64_feature_detected` is used somewhere +/// else than `PowerPC64` targets. +#[cfg(not(target_arch = "powerpc64"))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_powerpc64_feature_detected { + ($t:tt) => { + compile_error!( + r#" +is_powerpc64_feature_detected can only be used on PowerPC64 targets. +You can prevent it from being used in other architectures by +guarding it behind a cfg(target_arch) as follows: + + #[cfg(target_arch = "powerpc64")] { + if is_powerpc64_feature_detected(...) { ... } + } +"# + ) + }; +} + +/// Prevents compilation if `is_mips_feature_detected` is used somewhere else +/// than `MIPS` targets. +#[cfg(not(target_arch = "mips"))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_mips_feature_detected { + ($t:tt) => { + compile_error!( + r#" + is_mips_feature_detected can only be used on MIPS targets. + You can prevent it from being used in other architectures by + guarding it behind a cfg(target_arch) as follows: + + #[cfg(target_arch = "mips")] { + if is_mips_feature_detected(...) { ... } + } + "# + ) + }; +} + +/// Prevents compilation if `is_mips64_feature_detected` is used somewhere else +/// than `MIPS64` targets. +#[cfg(not(target_arch = "mips64"))] +#[macro_export] +#[unstable(feature = "stdsimd", issue = "27731")] +macro_rules! is_mips64_feature_detected { + ($t:tt) => { + compile_error!( + r#" + is_mips64_feature_detected can only be used on MIPS64 targets. + You can prevent it from being used in other architectures by + guarding it behind a cfg(target_arch) as follows: + + #[cfg(target_arch = "mips64")] { + if is_mips64_feature_detected(...) { ... } + } + "# + ) + }; +} diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs new file mode 100644 index 00000000000..f446e88eedc --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/mod.rs @@ -0,0 +1,85 @@ +//! This module implements run-time feature detection. +//! +//! The `is_{arch}_feature_detected!("feature-name")` macros take the name of a +//! feature as a string-literal, and return a boolean indicating whether the +//! feature is enabled at run-time or not. +//! +//! These macros do two things: +//! * map the string-literal into an integer stored as a `Feature` enum, +//! * call a `os::check_for(x: Feature)` function that returns `true` if the +//! feature is enabled. +//! +//! The `Feature` enums are also implemented in the `arch/{target_arch}.rs` +//! modules. +//! +//! The `check_for` functions are, in general, Operating System dependent. Most +//! architectures do not allow user-space programs to query the feature bits +//! due to security concerns (x86 is the big exception). These functions are +//! implemented in the `os/{target_os}.rs` modules. + +#[macro_use] +mod error_macros; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[path = "arch/x86.rs"] + #[macro_use] + mod arch; + } else if #[cfg(target_arch = "arm")] { + #[path = "arch/arm.rs"] + #[macro_use] + mod arch; + } else if #[cfg(target_arch = "aarch64")] { + #[path = "arch/aarch64.rs"] + #[macro_use] + mod arch; + } else if #[cfg(target_arch = "powerpc")] { + #[path = "arch/powerpc.rs"] + #[macro_use] + mod arch; + } else if #[cfg(target_arch = "powerpc64")] { + #[path = "arch/powerpc64.rs"] + #[macro_use] + mod arch; + } else if #[cfg(target_arch = "mips")] { + #[path = "arch/mips.rs"] + #[macro_use] + mod arch; + } else if #[cfg(target_arch = "mips64")] { + #[path = "arch/mips64.rs"] + #[macro_use] + mod arch; + } else { + // Unimplemented architecture: + mod arch { + pub enum Feature { + Null + } + } + } +} +pub use self::arch::Feature; + +mod bit; +mod cache; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + // On x86/x86_64 no OS specific functionality is required. + #[path = "os/x86.rs"] + mod os; + } else if #[cfg(all(target_os = "linux", feature = "use_std"))] { + #[path = "os/linux/mod.rs"] + mod os; + } else if #[cfg(target_os = "freebsd")] { + #[cfg(target_arch = "aarch64")] + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/freebsd/mod.rs"] + mod os; + } else { + #[path = "os/other.rs"] + mod os; + } +} +pub use self::os::check_for; diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs new file mode 100644 index 00000000000..f28d15a7c3e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs @@ -0,0 +1,79 @@ +//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction. +//! +//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use +//! privileged system registers from userspace to check CPU feature support. +//! +//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 +//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. +//! Each part of the register indicates the level of support for a certain feature, e.g. +//! when ID_AA64ISAR0_EL1[7:4] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! +//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), +//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). +//! +//! References: +//! +//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) +//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) + +use crate::detect::{Feature, cache}; + +/// Try to read the features from the system registers. +/// +/// This will cause SIGILL if the current OS is not trapping the mrs instruction. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { asm!("mrs $0, ID_AA64ISAR0_EL1" : "=r"(aa64isar0)); } + + let aes = bits_shift(aa64isar0, 7, 4) >= 1; + let pmull = bits_shift(aa64isar0, 7, 4) >= 2; + let sha1 = bits_shift(aa64isar0, 11, 8) >= 1; + let sha2 = bits_shift(aa64isar0, 15, 12) >= 1; + enable_feature(Feature::pmull, pmull); + // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp + enable_feature(Feature::crypto, aes && pmull && sha1 && sha2); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { asm!("mrs $0, ID_AA64PFR0_EL1" : "=r"(aa64pfr0)); } + + let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; + let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; + let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; + let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1; + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fphp); + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp)); + // SIMD extensions require SIMD support: + enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1); + enable_feature(Feature::dotprod, asimd && bits_shift(aa64isar0, 47, 44) >= 1); + enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { asm!("mrs $0, ID_AA64ISAR1_EL1" : "=r"(aa64isar1)); } + + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + } + + value +} + +#[inline] +fn bits_shift(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs new file mode 100644 index 00000000000..910d2f33b39 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs @@ -0,0 +1,28 @@ +//! Run-time feature detection for Aarch64 on FreeBSD. + +use crate::detect::{Feature, cache}; +use super::super::aarch64::detect_features; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +#[cfg(test)] +mod tests { + #[test] + fn dump() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("sve: {:?}", is_aarch64_feature_detected!("sve")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("crypto: {:?}", is_aarch64_feature_detected!("crypto")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs new file mode 100644 index 00000000000..1c73cefd47d --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs @@ -0,0 +1,14 @@ +//! Run-time feature detection on FreeBSD + +cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub use self::aarch64::check_for; + } else { + use arch::detect::Feature; + /// Performs run-time feature detection. + pub fn check_for(_x: Feature) -> bool { + false + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 00000000000..f7dc0f0222e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,157 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use crate::detect::{Feature, cache, bit}; +use super::{auxvec, cpuinfo}; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +fn detect_features() -> cache::Initializer { + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(); + } + if let Ok(c) = cpuinfo::CpuInfo::new() { + let hwcap: AtHwcap = c.into(); + return hwcap.cache(); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +struct AtHwcap { + fp: bool, // 0 + asimd: bool, // 1 + // evtstrm: bool, // 2 + aes: bool, // 3 + pmull: bool, // 4 + sha1: bool, // 5 + sha2: bool, // 6 + crc32: bool, // 7 + atomics: bool, // 8 + fphp: bool, // 9 + asimdhp: bool, // 10 + // cpuid: bool, // 11 + asimdrdm: bool, // 12 + // jscvt: bool, // 13 + // fcma: bool, // 14 + lrcpc: bool, // 15 + // dcpop: bool, // 16 + // sha3: bool, // 17 + // sm3: bool, // 18 + // sm4: bool, // 19 + asimddp: bool, // 20 + // sha512: bool, // 21 + sve: bool, // 22 +} + +impl From for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + // jscvt: bit::test(auxv.hwcap, 13), + // fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + // dcpop: bit::test(auxv.hwcap, 16), + // sha3: bit::test(auxv.hwcap, 17), + // sm3: bit::test(auxv.hwcap, 18), + // sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + // sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + } + } +} + +impl From for AtHwcap { + /// Reads AtHwcap from /proc/cpuinfo . + fn from(c: cpuinfo::CpuInfo) -> Self { + let f = &c.field("Features"); + AtHwcap { + // 64-bit names. FIXME: In 32-bit compatibility mode /proc/cpuinfo will + // map some of the 64-bit names to some 32-bit feature names. This does not + // cover that yet. + fp: f.has("fp"), + asimd: f.has("asimd"), + // evtstrm: f.has("evtstrm"), + aes: f.has("aes"), + pmull: f.has("pmull"), + sha1: f.has("sha1"), + sha2: f.has("sha2"), + crc32: f.has("crc32"), + atomics: f.has("atomics"), + fphp: f.has("fphp"), + asimdhp: f.has("asimdhp"), + // cpuid: f.has("cpuid"), + asimdrdm: f.has("asimdrdm"), + // jscvt: f.has("jscvt"), + // fcma: f.has("fcma"), + lrcpc: f.has("lrcpc"), + // dcpop: f.has("dcpop"), + // sha3: f.has("sha3"), + // sm3: f.has("sm3"), + // sm4: f.has("sm4"), + asimddp: f.has("asimddp"), + // sha512: f.has("sha512"), + sve: f.has("sve"), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The features are enabled approximately like in LLVM host feature detection: + /// https://github.com/llvm-mirror/llvm/blob/master/lib/Support/Host.cpp#L1273 + fn cache(self) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::rcpc, self.lrcpc); + + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // SIMD extensions require SIMD support: + enable_feature(Feature::rdm, self.asimdrdm && asimd); + enable_feature(Feature::dotprod, self.asimddp && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp + enable_feature(Feature::crypto, self.aes && self.pmull && self.sha1 && self.sha2); + } + value + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 00000000000..0d58a847cd6 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,49 @@ +//! Run-time feature detection for ARM on Linux. + +use crate::detect::{Feature, cache, bit}; +use super::{auxvec, cpuinfo}; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + return value; + } + + if let Ok(c) = cpuinfo::CpuInfo::new() { + enable_feature(&mut value, Feature::neon, c.field("Features").has("neon") && + !has_broken_neon(&c)); + enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull")); + return value; + } + value +} + +/// Is the CPU known to have a broken NEON unit? +/// +/// See https://crbug.com/341598. +fn has_broken_neon(cpuinfo: &cpuinfo::CpuInfo) -> bool { + cpuinfo.field("CPU implementer") == "0x51" + && cpuinfo.field("CPU architecture") == "7" + && cpuinfo.field("CPU variant") == "0x1" + && cpuinfo.field("CPU part") == "0x04d" + && cpuinfo.field("CPU revision") == "0" +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 00000000000..31c980fd382 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,270 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))] + +extern crate std; +use self::std::{prelude::v1::*, fs::File, io::Read}; + +use core::mem; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `getauxval` is dynamically linked to this binary, it will be used. +/// - Otherwise, try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. Also note that if this function returns an +/// error, cpuinfo still can (and will) be used to try to perform run-time +/// feature detecton on some platforms. +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result { + // Try to call a dynamically-linked getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any(target_arch = "aarch64", target_arch = "mips", + target_arch = "mips64"))] + { + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + drop(hwcap); + } + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// dynamically-linked `getauxval` function. If the function is not linked, +/// this function return `Err`. +fn getauxval(key: usize) -> Result { + use libc; + pub type F = unsafe extern "C" fn(usize) -> usize; + unsafe { + let ptr = libc::dlsym( + libc::RTLD_DEFAULT, + "getauxval\0".as_ptr() as *const _, + ); + if ptr.is_null() { + return Err(()); + } + + let ffi_getauxval: F = mem::transmute(ptr); + Ok(ffi_getauxval(key)) + } +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +fn auxv_from_file(file: &str) -> Result { + let mut file = File::open(file).map_err(|_| ())?; + + // See https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h + // + // The auxiliary vector contains at most 32 (key,value) fields: from + // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of + // 2*32 `usize` elements is enough to read the whole vector. + let mut buf = [0_usize; 64]; + { + let raw: &mut [u8; 64 * mem::size_of::()] = + unsafe { mem::transmute(&mut buf) }; + file.read(raw).map_err(|_| ())?; + } + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +fn auxv_from_buf(buf: &[usize; 64]) -> Result { + // Targets with only AT_HWCAP: + #[cfg(any(target_arch = "aarch64", target_arch = "mips", + target_arch = "mips64"))] + { + for el in buf.chunks(2) { + match el[0] { + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] + { + let mut hwcap = None; + let mut hwcap2 = None; + for el in buf.chunks(2) { + match el[0] { + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = Some(el[1]), + _ => (), + } + } + + if let (Some(hwcap), Some(hwcap2)) = (hwcap, hwcap2) { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + drop(buf); + Err(()) +} + +#[cfg(test)] +mod tests { + extern crate auxv as auxv_crate; + use super::*; + + // Reads the Auxiliary Vector key from /proc/self/auxv + // using the auxv crate. + fn auxv_crate_getprocfs(key: usize) -> Option { + use self::auxv_crate::AuxvType; + use self::auxv_crate::procfs::search_procfs_auxv; + let k = key as AuxvType; + match search_procfs_auxv(&[k]) { + Ok(v) => Some(v[&k] as usize), + Err(_) => None, + } + } + + // Reads the Auxiliary Vector key from getauxval() + // using the auxv crate. + #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))] + fn auxv_crate_getauxval(key: usize) -> Option { + use self::auxv_crate::AuxvType; + use self::auxv_crate::getauxval::Getauxval; + let q = auxv_crate::getauxval::NativeGetauxval {}; + match q.getauxval(key as AuxvType) { + Ok(v) => Some(v as usize), + Err(_) => None, + } + } + + // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv + // does not always contain the AT_HWCAP key under qemu. + #[cfg(not(any(target_arch = "mips", target_arch = "mips64", target_arch = "powerpc")))] + #[test] + fn auxv_crate() { + let v = auxv(); + if let Some(hwcap) = auxv_crate_getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] + { + if let Some(hwcap2) = auxv_crate_getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } + } + + #[test] + fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } + } + + cfg_if! { + if #[cfg(target_arch = "arm")] { + #[test] + fn linux_rpi3() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv"); + println!("file: {}", file); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + #[should_panic] + fn linux_macos_vb() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"); + println!("file: {}", file); + let v = auxv_from_file(file).unwrap(); + // this file is incomplete (contains hwcap but not hwcap2), we + // want to fall back to /proc/cpuinfo in this case, so + // reading should fail. assert_eq!(v.hwcap, 126614527); + // assert_eq!(v.hwcap2, 0); + } + } else if #[cfg(target_arch = "aarch64")] { + #[test] + fn linux_x64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-x64-i7-6850k.auxv"); + println!("file: {}", file); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 3219913727); + } + } + } + + #[test] + fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } + } + + #[test] + fn auxv_crate_procfs() { + let v = auxv(); + if let Some(hwcap) = auxv_crate_getprocfs(AT_HWCAP) { + assert_eq!(v.unwrap().hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] + { + if let Some(hwcap2) = auxv_crate_getprocfs(AT_HWCAP2) { + assert_eq!(v.unwrap().hwcap2, hwcap2); + } + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs new file mode 100644 index 00000000000..b3168578537 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs @@ -0,0 +1,301 @@ +//! Parses /proc/cpuinfo +#![cfg_attr(not(target_arch = "arm"), allow(dead_code))] + +extern crate std; +use self::std::{prelude::v1::*, fs::File, io, io::Read}; + +/// cpuinfo +pub(crate) struct CpuInfo { + raw: String, +} + +impl CpuInfo { + /// Reads /proc/cpuinfo into CpuInfo. + pub(crate) fn new() -> Result { + let mut file = File::open("/proc/cpuinfo")?; + let mut cpui = Self { raw: String::new() }; + file.read_to_string(&mut cpui.raw)?; + Ok(cpui) + } + /// Returns the value of the cpuinfo `field`. + pub(crate) fn field(&self, field: &str) -> CpuInfoField { + for l in self.raw.lines() { + if l.trim().starts_with(field) { + return CpuInfoField::new(l.split(": ").nth(1)); + } + } + CpuInfoField(None) + } + + /// Returns the `raw` contents of `/proc/cpuinfo` + #[cfg(test)] + fn raw(&self) -> &String { + &self.raw + } + + #[cfg(test)] + fn from_str(other: &str) -> Result { + Ok(Self { + raw: String::from(other), + }) + } +} + +/// Field of cpuinfo +#[derive(Debug)] +pub(crate) struct CpuInfoField<'a>(Option<&'a str>); + +impl<'a> PartialEq<&'a str> for CpuInfoField<'a> { + fn eq(&self, other: &&'a str) -> bool { + match self.0 { + None => other.is_empty(), + Some(f) => f == other.trim(), + } + } +} + +impl<'a> CpuInfoField<'a> { + pub(crate) fn new<'b>(v: Option<&'b str>) -> CpuInfoField<'b> { + match v { + None => CpuInfoField::<'b>(None), + Some(f) => CpuInfoField::<'b>(Some(f.trim())), + } + } + /// Does the field exist? + #[cfg(test)] + pub(crate) fn exists(&self) -> bool { + self.0.is_some() + } + /// Does the field contain `other`? + pub(crate) fn has(&self, other: &str) -> bool { + match self.0 { + None => other.is_empty(), + Some(f) => { + let other = other.trim(); + for v in f.split(' ') { + if v == other { + return true; + } + } + false + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn raw_dump() { + let cpuinfo = CpuInfo::new().unwrap(); + if cpuinfo.field("vendor_id") == "GenuineIntel" { + assert!(cpuinfo.field("flags").exists()); + assert!(!cpuinfo.field("vendor33_id").exists()); + assert!(cpuinfo.field("flags").has("sse")); + assert!(!cpuinfo.field("flags").has("avx314")); + } + println!("{}", cpuinfo.raw()); + } + + const CORE_DUO_T6500: &str = r"processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 23 +model name : Intel(R) Core(TM)2 Duo CPU T6500 @ 2.10GHz +stepping : 10 +microcode : 0xa0b +cpu MHz : 1600.000 +cache size : 2048 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 2 +apicid : 0 +initial apicid : 0 +fdiv_bug : no +hlt_bug : no +f00f_bug : no +coma_bug : no +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm +bogomips : 4190.43 +clflush size : 64 +cache_alignment : 64 +address sizes : 36 bits physical, 48 bits virtual +power management: +"; + + #[test] + fn core_duo_t6500() { + let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap(); + assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel"); + assert_eq!(cpuinfo.field("cpu family"), "6"); + assert_eq!(cpuinfo.field("model"), "23"); + assert_eq!( + cpuinfo.field("model name"), + "Intel(R) Core(TM)2 Duo CPU T6500 @ 2.10GHz" + ); + assert_eq!( + cpuinfo.field("flags"), + "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm" + ); + assert!(cpuinfo.field("flags").has("fpu")); + assert!(cpuinfo.field("flags").has("dtherm")); + assert!(cpuinfo.field("flags").has("sse2")); + assert!(!cpuinfo.field("flags").has("avx")); + } + + const ARM_CORTEX_A53: &str = + r"Processor : AArch64 Processor rev 3 (aarch64) + processor : 0 + processor : 1 + processor : 2 + processor : 3 + processor : 4 + processor : 5 + processor : 6 + processor : 7 + Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 + CPU implementer : 0x41 + CPU architecture: AArch64 + CPU variant : 0x0 + CPU part : 0xd03 + CPU revision : 3 + + Hardware : HiKey Development Board + "; + + #[test] + fn arm_cortex_a53() { + let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap(); + assert_eq!( + cpuinfo.field("Processor"), + "AArch64 Processor rev 3 (aarch64)" + ); + assert_eq!( + cpuinfo.field("Features"), + "fp asimd evtstrm aes pmull sha1 sha2 crc32" + ); + assert!(cpuinfo.field("Features").has("pmull")); + assert!(!cpuinfo.field("Features").has("neon")); + assert!(cpuinfo.field("Features").has("asimd")); + } + + const ARM_CORTEX_A57: &str = r"Processor : Cortex A57 Processor rev 1 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +Features : fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd07 +CPU revision : 1"; + + #[test] + fn arm_cortex_a57() { + let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap(); + assert_eq!( + cpuinfo.field("Processor"), + "Cortex A57 Processor rev 1 (aarch64)" + ); + assert_eq!( + cpuinfo.field("Features"), + "fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt" + ); + assert!(cpuinfo.field("Features").has("pmull")); + assert!(cpuinfo.field("Features").has("neon")); + assert!(cpuinfo.field("Features").has("asimd")); + } + + const POWER8E_POWERKVM: &str = r"processor : 0 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +processor : 1 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +processor : 2 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +processor : 3 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +timebase : 512000000 +platform : pSeries +model : IBM pSeries (emulated by qemu) +machine : CHRP IBM pSeries (emulated by qemu)"; + + #[test] + fn power8_powerkvm() { + let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap(); + assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported"); + + assert!(cpuinfo.field("cpu").has("altivec")); + } + + const POWER5P: &str = r"processor : 0 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 1 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 2 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 3 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 4 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 5 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 6 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 7 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +timebase : 237331000 +platform : pSeries +machine : CHRP IBM,9133-55A"; + + #[test] + fn power5p() { + let cpuinfo = CpuInfo::from_str(POWER5P).unwrap(); + assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)"); + + assert!(!cpuinfo.field("cpu").has("altivec")); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 00000000000..7c180326feb --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,31 @@ +//! Run-time feature detection for MIPS on Linux. + +use crate::detect::{Feature, cache, bit}; +use super::auxvec; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + // TODO: fall back via cpuinfo + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 00000000000..642dfb46571 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,26 @@ +//! Run-time feature detection on Linux + +mod auxvec; +mod cpuinfo; + +cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub use self::aarch64::check_for; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub use self::arm::check_for; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub use self::mips::check_for; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub use self::powerpc::check_for; + } else { + use crate::detect::Feature; + /// Performs run-time feature detection. + pub fn check_for(_x: Feature) -> bool { + false + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 00000000000..0022a7db983 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,41 @@ +//! Run-time feature detection for PowerPC on Linux. + +use crate::detect::{Feature, cache}; +use super::{auxvec, cpuinfo}; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap & 0x80000000 != 0); + return value; + } + + // PowerPC's /proc/cpuinfo lacks a proper Feature field, + // but `altivec` support is indicated in the `cpu` field. + if let Ok(c) = cpuinfo::CpuInfo::new() { + enable_feature(&mut value, Feature::altivec, c.field("cpu").has("altivec")); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs new file mode 100644 index 00000000000..23e399ea790 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs @@ -0,0 +1,9 @@ +//! Other operating systems + +use crate::detect::Feature; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(_x: Feature) -> bool { + false +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs new file mode 100644 index 00000000000..9237d5dc0a5 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -0,0 +1,357 @@ +//! x86 run-time feature detection is OS independent. + +use core::{prelude::v1::*, mem}; +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use crate::detect::{Feature, cache, bit}; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +/// Run-time feature detection on x86 works by using the CPUID instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains +/// all the information about which flags to set to query which values, and in +/// which registers these are reported. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))] +fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + // If the x86 CPU does not support the CPUID instruction then it is too + // old to support any of the currently-detectable features. + if !has_cpuid() { + return value; + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_basic_leaf, vendor_id) = unsafe { + let CpuidResult { + eax: max_basic_leaf, + ebx, + ecx, + edx, + } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ + mem::transmute(ebx), + mem::transmute(edx), + mem::transmute(ecx), + ]; + let vendor_id: [u8; 12] = mem::transmute(vendor_id); + (max_basic_leaf, vendor_id) + }; + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; + + // EAX = 7, ECX = 0: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 + { + let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + (ebx, ecx) + } else { + (0, 0) // CPUID does not support "Extended Features" + }; + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { + eax: extended_max_basic_leaf, + .. + } = unsafe { __cpuid(0x8000_0000_u32) }; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; + + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| { + if bit::test(r as usize, rb) { + value.set(f as u32); + } + }; + + enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); + enable(proc_info_ecx, 19, Feature::sse4_1); + enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 23, Feature::popcnt); + enable(proc_info_ecx, 25, Feature::aes); + enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 30, Feature::rdrand); + enable(extended_features_ebx, 18, Feature::rdseed); + enable(extended_features_ebx, 19, Feature::adx); + enable(proc_info_edx, 4, Feature::tsc); + enable(proc_info_edx, 23, Feature::mmx); + enable(proc_info_edx, 24, Feature::fxsr); + enable(proc_info_edx, 25, Feature::sse); + enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); + + enable(extended_features_ebx, 3, Feature::bmi); + enable(extended_features_ebx, 8, Feature::bmi2); + + // `XSAVE` and `AVX` support: + let cpu_xsave = bit::test(proc_info_ecx as usize, 26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` + // and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`: + let os_avx512_support = xcr0 & 224 == 224; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + // + enable(proc_info_ecx, 26, Feature::xsave); + + // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, + // ECX = 1): + if max_basic_leaf >= 0xd { + let CpuidResult { + eax: proc_extended_state1_eax, + .. + } = unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, Feature::xsavec); + enable(proc_extended_state1_eax, 3, Feature::xsaves); + } + + // FMA (uses 256-bit wide registers): + enable(proc_info_ecx, 12, Feature::fma); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, Feature::avx); + enable(extended_features_ebx, 5, Feature::avx2); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + if os_avx512_support { + enable(extended_features_ebx, 16, Feature::avx512f); + enable(extended_features_ebx, 17, Feature::avx512dq); + enable(extended_features_ebx, 21, Feature::avx512_ifma); + enable(extended_features_ebx, 26, Feature::avx512pf); + enable(extended_features_ebx, 27, Feature::avx512er); + enable(extended_features_ebx, 28, Feature::avx512cd); + enable(extended_features_ebx, 30, Feature::avx512bw); + enable(extended_features_ebx, 31, Feature::avx512vl); + enable(extended_features_ecx, 1, Feature::avx512_vbmi); + enable( + extended_features_ecx, + 14, + Feature::avx512_vpopcntdq, + ); + } + } + } + } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `is_x86_feature_detected!("lzcnt")` macro then + // internally maps to Feature::abm. + enable(extended_proc_info_ecx, 5, Feature::abm); + if vendor_id == *b"AuthenticAMD" { + // These features are only available on AMD CPUs: + enable(extended_proc_info_ecx, 6, Feature::sse4a); + enable(extended_proc_info_ecx, 21, Feature::tbm); + } + } + + value +} + +#[cfg(test)] +mod tests { + extern crate cupid; + + #[test] + fn dump() { + println!("aes: {:?}", is_x86_feature_detected!("aes")); + println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq")); + println!("rdrand: {:?}", is_x86_feature_detected!("rdrand")); + println!("rdseed: {:?}", is_x86_feature_detected!("rdseed")); + println!("tsc: {:?}", is_x86_feature_detected!("tsc")); + println!("sse: {:?}", is_x86_feature_detected!("sse")); + println!("sse2: {:?}", is_x86_feature_detected!("sse2")); + println!("sse3: {:?}", is_x86_feature_detected!("sse3")); + println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); + println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); + println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); + println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); + println!("avx: {:?}", is_x86_feature_detected!("avx")); + println!("avx2: {:?}", is_x86_feature_detected!("avx2")); + println!("avx512f {:?}", is_x86_feature_detected!("avx512f")); + println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd")); + println!("avx512er {:?}", is_x86_feature_detected!("avx512er")); + println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf")); + println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw")); + println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq")); + println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl")); + println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma")); + println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi")); + println!( + "avx512_vpopcntdq {:?}", + is_x86_feature_detected!("avx512vpopcntdq") + ); + println!("fma: {:?}", is_x86_feature_detected!("fma")); + println!("abm: {:?}", is_x86_feature_detected!("abm")); + println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); + println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); + println!("tbm: {:?}", is_x86_feature_detected!("tbm")); + println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); + println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); + println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); + println!("xsave: {:?}", is_x86_feature_detected!("xsave")); + println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); + println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); + println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); + println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); + println!("adx: {:?}", is_x86_feature_detected!("adx")); + } + + #[test] + fn compare_with_cupid() { + let information = cupid::master().unwrap(); + assert_eq!(is_x86_feature_detected!("aes"), information.aesni()); + assert_eq!(is_x86_feature_detected!("pclmulqdq"), information.pclmulqdq()); + assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand()); + assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed()); + assert_eq!(is_x86_feature_detected!("tsc"), information.tsc()); + assert_eq!(is_x86_feature_detected!("sse"), information.sse()); + assert_eq!(is_x86_feature_detected!("sse2"), information.sse2()); + assert_eq!(is_x86_feature_detected!("sse3"), information.sse3()); + assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3()); + assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1()); + assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2()); + assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a()); + assert_eq!(is_x86_feature_detected!("sha"), information.sha()); + assert_eq!(is_x86_feature_detected!("avx"), information.avx()); + assert_eq!(is_x86_feature_detected!("avx2"), information.avx2()); + assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f()); + assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd()); + assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er()); + assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf()); + assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw()); + assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq()); + assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl()); + assert_eq!( + is_x86_feature_detected!("avx512ifma"), + information.avx512_ifma() + ); + assert_eq!( + is_x86_feature_detected!("avx512vbmi"), + information.avx512_vbmi() + ); + assert_eq!( + is_x86_feature_detected!("avx512vpopcntdq"), + information.avx512_vpopcntdq() + ); + assert_eq!(is_x86_feature_detected!("fma"), information.fma()); + assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1()); + assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2()); + assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt()); + assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt()); + assert_eq!(is_x86_feature_detected!("tbm"), information.tbm()); + assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt()); + assert_eq!(is_x86_feature_detected!("xsave"), information.xsave()); + assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt()); + assert_eq!( + is_x86_feature_detected!("xsavec"), + information.xsavec_and_xrstor() + ); + assert_eq!( + is_x86_feature_detected!("xsaves"), + information.xsaves_xrstors_and_ia32_xss() + ); + assert_eq!( + is_x86_feature_detected!("cmpxchg16b"), + information.cmpxchg16b(), + ); + assert_eq!( + is_x86_feature_detected!("adx"), + information.adx(), + ); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv new file mode 100644 index 00000000000..0538e661f63 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv new file mode 100644 index 00000000000..6afe1b3b46a Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv differ diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv new file mode 100644 index 00000000000..75abc02d178 Binary files /dev/null and b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv differ diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs new file mode 100644 index 00000000000..af7fc3bdc42 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -0,0 +1,37 @@ +//! Run-time feature detection for the Rust standard library. +//! +//! To detect whether a feature is enabled in the system running the binary +//! use one of the appropriate macro for the target: +//! +//! * `x86` and `x86_64`: [`is_x86_feature_detected`] +//! * `arm`: [`is_arm_feature_detected`] +//! * `aarch64`: [`is_aarch64_feature_detected`] +//! * `mips`: [`is_mips_feature_detected`] +//! * `mips64`: [`is_mips64_feature_detected`] +//! * `powerpc`: [`is_powerpc_feature_detected`] +//! * `powerpc64`: [`is_powerpc64_feature_detected`] + +#![unstable(feature = "stdsimd", issue = "27731")] +#![feature(const_fn, integer_atomics, staged_api, stdsimd)] +#![feature(doc_cfg, allow_internal_unstable)] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::shadow_reuse))] +#![cfg_attr( + feature = "cargo-clippy", + deny(clippy::missing_inline_in_public_items,) +)] +#![cfg_attr(target_os = "linux", feature(linkage))] +#![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))] +#![no_std] + +#[cfg(test)] +#[macro_use(println)] +extern crate std; + +extern crate libc; + +#[macro_use] +extern crate cfg_if; + +#[doc(hidden)] +#[unstable(feature = "stdsimd", issue = "27731")] +pub mod detect; diff --git a/library/stdarch/crates/std_detect/src/mod.rs b/library/stdarch/crates/std_detect/src/mod.rs new file mode 100644 index 00000000000..b630e7ff383 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/mod.rs @@ -0,0 +1,5 @@ +//! `std_detect` + +#[doc(hidden)] // unstable implementation detail +#[unstable(feature = "stdsimd", issue = "27731")] +pub mod detect; -- cgit 1.4.1-3-g733a5 From 5f7006df5a945332e4f2a20a88b3ded8ce5c44fa Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 22 Jan 2019 18:48:36 +0100 Subject: Fix clippy issues --- library/stdarch/.travis.yml | 6 ++---- library/stdarch/crates/assert-instr-macro/src/lib.rs | 2 +- library/stdarch/crates/core_arch/src/simd.rs | 8 ++++---- library/stdarch/crates/core_arch/src/x86/adx.rs | 6 +++--- library/stdarch/crates/core_arch/src/x86/cpuid.rs | 8 ++++++-- library/stdarch/crates/core_arch/src/x86/rdrand.rs | 4 ++-- library/stdarch/crates/core_arch/src/x86/xsave.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/adx.rs | 6 +++--- library/stdarch/crates/core_arch/src/x86_64/bswap.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/rdrand.rs | 3 ++- library/stdarch/crates/core_arch/src/x86_64/xsave.rs | 2 +- library/stdarch/crates/std_detect/src/detect/cache.rs | 6 +++++- library/stdarch/examples/hex.rs | 1 + 14 files changed, 33 insertions(+), 25 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/.travis.yml b/library/stdarch/.travis.yml index df49db3791f..dfb4d37db00 100644 --- a/library/stdarch/.travis.yml +++ b/library/stdarch/.travis.yml @@ -89,7 +89,7 @@ matrix: - name: "clippy" install: true script: | - if rustup component add clippy-preview; then + if rustup component add clippy-preview ; then cargo clippy --all -- -D clippy::pedantic fi - name: "Shellcheck" @@ -97,9 +97,7 @@ matrix: script: - shellcheck --version - shellcheck ci/*.sh - allow_failures: - - name: "clippy" - + install: travis_retry rustup target add $TARGET script: - cargo generate-lockfile diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs index eba00e6b2b4..3a6e0dfb9ce 100644 --- a/library/stdarch/crates/assert-instr-macro/src/lib.rs +++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs @@ -173,7 +173,7 @@ impl syn::parse::Parse for Invoc { println!("{:?}", input.cursor().token_stream()); return Err(input.error("expected an instruction")); } - if instr.len() == 0 { + if instr.is_empty() { return Err(input.error("expected an instruction before comma")); } let mut args = Vec::new(); diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs index 468b1e380bd..237a5735739 100644 --- a/library/stdarch/crates/core_arch/src/simd.rs +++ b/library/stdarch/crates/core_arch/src/simd.rs @@ -12,12 +12,12 @@ macro_rules! simd_ty { impl $id { #[inline] pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self { - $id($($elem_name),*) + Self($($elem_name),*) } #[inline] pub(crate) const fn splat(value: $ety) -> Self { - $id($({ + Self($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; value @@ -48,12 +48,12 @@ macro_rules! simd_m_ty { #[inline] pub(crate) const fn new($($elem_name: bool),*) -> Self { - $id($(Self::bool_to_internal($elem_name)),*) + Self($(Self::bool_to_internal($elem_name)),*) } #[inline] pub(crate) const fn splat(value: bool) -> Self { - $id($({ + Self($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; Self::bool_to_internal(value) diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs index c59743980f2..f130e7a41da 100644 --- a/library/stdarch/crates/core_arch/src/x86/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86/adx.rs @@ -9,7 +9,7 @@ extern "unadjusted" { fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32); } -/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in c_in +/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in` /// (carry flag), and store the unsigned 32-bit result in out, and the carry-out /// is returned (carry or overflow flag). #[inline] @@ -21,7 +21,7 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { a } -/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in c_in +/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 32-bit result in out, and /// the carry-out is returned (carry or overflow flag). #[inline] @@ -33,7 +33,7 @@ pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { _addcarry_u32(c_in, a, b, out) } -/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in c_in +/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 32-bit result in out, and /// the carry-out is returned (carry or overflow flag). #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs index adf3e127d64..d796995ad38 100644 --- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs +++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs @@ -1,13 +1,17 @@ //! `cpuid` intrinsics -#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] #[cfg(test)] use stdsimd_test::assert_instr; /// Result of the `cpuid` instruction. +#[cfg_attr( + feature = "cargo-clippy", + // the derived impl of Debug for CpuidResult is not #[inline] and that's OK. + allow(clippy::missing_inline_in_public_items) +)] #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] #[stable(feature = "simd_x86", since = "1.27.0")] pub struct CpuidResult { /// EAX register. diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs index 63573f689d6..90bb9454b0a 100644 --- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs @@ -2,6 +2,8 @@ //! on-chip hardware random number generator which has been seeded by an //! on-chip entropy source. +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] + #[allow(improper_ctypes)] extern "unadjusted" { #[link_name = "llvm.x86.rdrand.16"] @@ -24,7 +26,6 @@ use stdsimd_test::assert_instr; #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { let (v, flag) = x86_rdrand16_step(); @@ -39,7 +40,6 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { let (v, flag) = x86_rdrand32_step(); diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs index c52dcd8c2a0..4c7f5338a96 100644 --- a/library/stdarch/crates/core_arch/src/x86/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs @@ -1,6 +1,6 @@ //! `i586`'s `xsave` and `xsaveopt` target feature intrinsics -#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs index 0343351b916..38a90047e40 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -9,7 +9,7 @@ extern "unadjusted" { fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64); } -/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in c_in +/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in` /// (carry flag), and store the unsigned 64-bit result in out, and the carry-out /// is returned (carry or overflow flag). #[inline] @@ -21,7 +21,7 @@ pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { a } -/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in c_in +/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 64-bit result in out, and /// the carry-out is returned (carry or overflow flag). #[inline] @@ -33,7 +33,7 @@ pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { _addcarry_u64(c_in, a, b, out) } -/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in c_in +/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 64-bit result in out, and /// the carry-out is returned (carry or overflow flag). #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs index 75bb33c956c..ba121d67006 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -1,6 +1,6 @@ //! Byte swap intrinsics. -#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs index c7b43a4469b..822bfc2fb3d 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs @@ -5,7 +5,7 @@ use stdsimd_test::assert_instr; /// Compare and exchange 16 bytes (128 bits) of data atomically. /// -/// This intrinsic corresponds to the `cmpxchg16b` instruction on x86_64 +/// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64` /// processors. It performs an atomic compare-and-swap, updating the `ptr` /// memory location to `val` if the current value in memory equals `old`. /// diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs index 7cc0d710c81..aef4f638ebe 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -2,6 +2,8 @@ //! on-chip hardware random number generator which has been seeded by an //! on-chip entropy source. +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] + #[allow(improper_ctypes)] extern "unadjusted" { #[link_name = "llvm.x86.rdrand.64"] @@ -20,7 +22,6 @@ use stdsimd_test::assert_instr; #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { let (v, flag) = x86_rdrand64_step(); diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs index 875b677dbd2..7531ac5832e 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -1,6 +1,6 @@ //! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics -#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index c2de4da7349..a0d009428c8 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -32,7 +32,7 @@ pub(crate) struct Initializer(u64); impl Default for Initializer { fn default() -> Self { - Initializer(0) + Self(0) } } @@ -77,6 +77,10 @@ struct Cache(AtomicU64); #[cfg(target_pointer_width = "64")] impl Cache { /// Creates an uninitialized cache. + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::declare_interior_mutable_const) + )] const fn uninitialized() -> Self { const X: AtomicU64 = AtomicU64::new(u64::max_value()); Self(X) diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs index 37f2ce70160..9e6c84c11d2 100644 --- a/library/stdarch/examples/hex.rs +++ b/library/stdarch/examples/hex.rs @@ -22,6 +22,7 @@ clippy::option_unwrap_used, clippy::shadow_reuse, clippy::cast_possible_wrap, + clippy::cast_ptr_alignment, clippy::cast_sign_loss, clippy::missing_docs_in_private_items ) -- cgit 1.4.1-3-g733a5 From af7134fcf1e9eb4dbc380d8e1425de0af0ee12c3 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 22 Jan 2019 19:50:20 +0100 Subject: Do not use Self constructors --- library/stdarch/crates/core_arch/src/simd.rs | 10 ++++++---- library/stdarch/crates/std_detect/src/detect/cache.rs | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs index 237a5735739..5c8425623d6 100644 --- a/library/stdarch/crates/core_arch/src/simd.rs +++ b/library/stdarch/crates/core_arch/src/simd.rs @@ -9,15 +9,16 @@ macro_rules! simd_ty { #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct $id($(pub $elem_ty),*); + #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] impl $id { #[inline] pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self { - Self($($elem_name),*) + $id($($elem_name),*) } #[inline] pub(crate) const fn splat(value: $ety) -> Self { - Self($({ + $id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; value @@ -40,6 +41,7 @@ macro_rules! simd_m_ty { #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct $id($(pub $elem_ty),*); + #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] impl $id { #[inline] const fn bool_to_internal(x: bool) -> $ety { @@ -48,12 +50,12 @@ macro_rules! simd_m_ty { #[inline] pub(crate) const fn new($($elem_name: bool),*) -> Self { - Self($(Self::bool_to_internal($elem_name)),*) + $id($(Self::bool_to_internal($elem_name)),*) } #[inline] pub(crate) const fn splat(value: bool) -> Self { - Self($({ + $id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; Self::bool_to_internal(value) diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index a0d009428c8..944dccbe6df 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -30,9 +30,10 @@ const CACHE_CAPACITY: u32 = 63; #[derive(Copy, Clone)] pub(crate) struct Initializer(u64); +#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] impl Default for Initializer { fn default() -> Self { - Self(0) + Initializer(0) } } @@ -75,6 +76,7 @@ static CACHE: Cache = Cache::uninitialized(); struct Cache(AtomicU64); #[cfg(target_pointer_width = "64")] +#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] impl Cache { /// Creates an uninitialized cache. #[cfg_attr( @@ -83,7 +85,7 @@ impl Cache { )] const fn uninitialized() -> Self { const X: AtomicU64 = AtomicU64::new(u64::max_value()); - Self(X) + Cache(X) } /// Is the cache uninitialized? #[inline] -- cgit 1.4.1-3-g733a5 From e51ee17aa79ec4ccb2901ba128819bfccd5af651 Mon Sep 17 00:00:00 2001 From: Juan Aguilar Santillana Date: Sun, 3 Feb 2019 20:20:08 +0100 Subject: Add detect macros should support trailing commas (Fix #443) --- .../crates/std_detect/src/detect/arch/aarch64.rs | 3 ++ .../crates/std_detect/src/detect/arch/arm.rs | 3 ++ .../crates/std_detect/src/detect/arch/mips.rs | 3 ++ .../crates/std_detect/src/detect/arch/mips64.rs | 3 ++ .../crates/std_detect/src/detect/arch/powerpc.rs | 3 ++ .../crates/std_detect/src/detect/arch/powerpc64.rs | 3 ++ .../crates/std_detect/src/detect/arch/x86.rs | 3 ++ .../std_detect/tests/macro_trailing_commas.rs | 55 ++++++++++++++++++++++ 8 files changed, 76 insertions(+) create mode 100644 library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs index 882c22cc174..7571dfbfec3 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -66,6 +66,9 @@ macro_rules! is_aarch64_feature_detected { ("v8.3a") => { compile_error!("\"v8.3a\" feature cannot be detected at run-time") }; + ($t:tt,) => { + is_aarch64_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown aarch64 target feature: ", $t)) }; } diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs index cb6ac6badcc..5f91c9269ad 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -17,6 +17,9 @@ macro_rules! is_arm_feature_detected { ("vfp2") => { compile_error!("\"vfp2\" feature cannot be detected at run-time") }; ("vfp3") => { compile_error!("\"vfp3\" feature cannot be detected at run-time") }; ("vfp4") => { compile_error!("\"vfp4\" feature cannot be detected at run-time") }; + ($t:tt,) => { + is_arm_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) }; } diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs index 876f8dde262..58e3ee6d204 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -9,6 +9,9 @@ macro_rules! is_mips_feature_detected { cfg!(target_feature = "msa") || $crate::detect::check_for($crate::detect::Feature::msa) }; + ($t:tt,) => { + is_mips_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown mips target feature: ", $t)) }; } diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs index ab837b3d5c9..aa42c0e55df 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -9,6 +9,9 @@ macro_rules! is_mips64_feature_detected { cfg!(target_feature = "msa") || $crate::detect::check_for($crate::detect::Feature::msa) }; + ($t:tt,) => { + is_mips64_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown mips64 target feature: ", $t)) }; } diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs index 9c440b1d6b0..8270e5bee46 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -17,6 +17,9 @@ macro_rules! is_powerpc_feature_detected { cfg!(target_feature = "power8") || $crate::detect::check_for($crate::detect::Feature::power8) }; + ($t:tt,) => { + is_powerpc_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown powerpc target feature: ", $t)) }; } diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs index 910940f0bb9..2f754713650 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -17,6 +17,9 @@ macro_rules! is_powerpc64_feature_detected { cfg!(target_feature = "power8") || $crate::detect::check_for($crate::detect::Feature::power8) }; + ($t:tt,) => { + is_powerpc64_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown powerpc64 target feature: ", $t)) }; } diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 3ef8d31d12b..5425aeecd0c 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -234,6 +234,9 @@ macro_rules! is_x86_feature_detected { cfg!(target_feature = "adx") || $crate::detect::check_for( $crate::detect::Feature::adx) }; + ($t:tt,) => { + is_x86_feature_detected!($t); + }; ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs new file mode 100644 index 00000000000..d63da6af06a --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs @@ -0,0 +1,55 @@ +#![feature(stdsimd)] +#![cfg_attr(stdsimd_strict, deny(warnings))] +#![cfg_attr( + feature = "cargo-clippy", + allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout) +)] + +#[cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +#[macro_use] +extern crate std_detect; + +#[test] +#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))] +fn arm_linux() { + let _ = is_arm_feature_detected!("neon"); + let _ = is_arm_feature_detected!("neon",); +} + +#[test] +#[cfg(all( + target_arch = "aarch64", + any(target_os = "linux", target_os = "android") +))] +fn aarch64_linux() { + let _ = is_aarch64_feature_detected!("fp"); + let _ = is_aarch64_feature_detected!("fp",); +} + +#[test] +#[cfg(all(target_arch = "powerpc", target_os = "linux"))] +fn powerpc_linux() { + let _ = is_powerpc_feature_detected!("altivec"); + let _ = is_powerpc_feature_detected!("altivec",); +} + +#[test] +#[cfg(all(target_arch = "powerpc64", target_os = "linux"))] +fn powerpc64_linux() { + let _ = is_powerpc64_feature_detected!("altivec"); + let _ = is_powerpc64_feature_detected!("altivec",); +} + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn x86_all() { + let _ = is_x86_feature_detected!("sse"); + let _ = is_x86_feature_detected!("sse",); +} -- cgit 1.4.1-3-g733a5 From eb13680d1aed60d8564f693aabb230ac189524e2 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 5 Feb 2019 08:22:18 +0100 Subject: Remove const workaround in std_detect cache --- library/stdarch/crates/std_detect/src/detect/cache.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index 944dccbe6df..1ab94dd1cd9 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -84,8 +84,7 @@ impl Cache { allow(clippy::declare_interior_mutable_const) )] const fn uninitialized() -> Self { - const X: AtomicU64 = AtomicU64::new(u64::max_value()); - Cache(X) + Cache(AtomicU64::new(u64::max_value())) } /// Is the cache uninitialized? #[inline] -- cgit 1.4.1-3-g733a5 From ff129bff0534fe7cb69125ac5d6a8b6aeb5a6a60 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 7 Feb 2019 22:56:23 +0100 Subject: Add cargo features to disable usage of file I/O and dlsym in std_detect --- library/stdarch/ci/run.sh | 27 ++++++--- library/stdarch/crates/std_detect/Cargo.toml | 9 ++- library/stdarch/crates/std_detect/README.md | 16 +++++ .../stdarch/crates/std_detect/src/detect/cache.rs | 6 +- .../std_detect/src/detect/os/linux/auxvec.rs | 69 +++++++++++++++++----- .../crates/std_detect/src/detect/os/linux/mod.rs | 2 + .../stdarch/crates/std_detect/src/detect/os/x86.rs | 7 ++- library/stdarch/crates/std_detect/src/lib.rs | 29 ++++++--- 8 files changed, 125 insertions(+), 40 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh index 2d33060f801..db08da4645f 100755 --- a/library/stdarch/ci/run.sh +++ b/library/stdarch/ci/run.sh @@ -58,12 +58,6 @@ cargo_test() { fi fi cmd="$cmd ${subcmd} --target=$TARGET $1" - if [ "$NOSTD" = "1" ] - then - cmd="$cmd -p core_arch" - else - cmd="$cmd -p core_arch -p std_detect -p stdsimd_examples" - fi cmd="$cmd -- $2" # Un-commenting this disables the test output and shows only a summary: #if [ "$NORUN" != "1" ] @@ -88,8 +82,25 @@ cargo_output() { } cargo_setup -cargo_test -cargo_test "--release" + +CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml" +STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml" +STDSIMD_EXAMPLES="--manifest-path=examples/Cargo.toml" +cargo_test "${CORE_ARCH}" +cargo_test "${CORE_ARCH} --release" +if [ "$NOSTD" != "1" ]; then + cargo_test "${STD_DETECT}" + cargo_test "${STD_DETECT} --release" + + cargo_test "${STD_DETECT} --no-default-features" + cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io" + cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval" + cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io" + + cargo_test "${STDSIMD_EXAMPLES}" + cargo_test "${STDSIMD_EXAMPLES} --release" +fi + cargo_output # Test targets compiled with extra features. diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml index 47ec3e99708..eee52fb5135 100644 --- a/library/stdarch/crates/std_detect/Cargo.toml +++ b/library/stdarch/crates/std_detect/Cargo.toml @@ -23,9 +23,14 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" } maintenance = { status = "experimental" } [dependencies] -libc = "0.2" +libc = { version = "0.2", optional = true, default-features = false } cfg-if = "0.1" [dev-dependencies] auxv = "0.3.3" -cupid = "0.6.0" \ No newline at end of file +cupid = "0.6.0" + +[features] +default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ] +std_detect_file_io = [] +std_detect_dlsym_getauxval = [ "libc" ] \ No newline at end of file diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md index 6a8143cb20b..4d2ec7d3448 100644 --- a/library/stdarch/crates/std_detect/README.md +++ b/library/stdarch/crates/std_detect/README.md @@ -24,6 +24,22 @@ run-time feature detection support than the one offered by Rust's standard library. We intend to make `std_detect` more flexible and configurable in this regard to better serve the needs of `#[no_std]` targets. +# Features + +* `std_detect_dlsym_getauxval` (enabled by default, requires `libc`): Enable to +use `libc::dlsym` to query whether [`getauxval`] is linked into the binary. When +this is not the case, this feature allows other fallback methods to perform +run-time feature detection. When this feature is disabled, `std_detect` assumes +that [`getauxval`] is linked to the binary. If that is not the case the behavior +is undefined. + +* `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature +detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant +methods fail. This feature requires `libstd` as a dependency, preventing the +crate from working on applications in which `std` is not available. + +[`getauxval`]: http://man7.org/linux/man-pages/man3/getauxval.3.html + # Platform support * All `x86`/`x86_64` targets are supported on all platforms by querying the diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index 1ab94dd1cd9..ee1914875cf 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -3,13 +3,13 @@ #![allow(dead_code)] // not used on all platforms -use core::sync::atomic::Ordering; +use sync::atomic::Ordering; #[cfg(target_pointer_width = "64")] -use core::sync::atomic::AtomicU64; +use sync::atomic::AtomicU64; #[cfg(target_pointer_width = "32")] -use core::sync::atomic::AtomicU32; +use sync::atomic::AtomicU32; /// Sets the `bit` of `x`. #[inline] diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs index 31c980fd382..6ccdbbc88a8 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -1,10 +1,8 @@ //! Parses ELF auxiliary vectors. #![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))] -extern crate std; -use self::std::{prelude::v1::*, fs::File, io::Read}; - -use core::mem; +#[cfg(feature = "std_detect_file_io")] +use ::{fs::File, io::Read}; /// Key to access the CPU Hardware capabilities bitfield. pub(crate) const AT_HWCAP: usize = 16; @@ -34,8 +32,12 @@ pub(crate) struct AuxVec { /// /// There is no perfect way of reading the auxiliary vector. /// -/// - If the `getauxval` is dynamically linked to this binary, it will be used. -/// - Otherwise, try to read `/proc/self/auxv`. +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. /// - If that fails, this function returns an error. /// /// Note that run-time feature detection is not invoked for features that can @@ -49,8 +51,42 @@ pub(crate) struct AuxVec { /// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h /// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ pub(crate) fn auxv() -> Result { - // Try to call a dynamically-linked getauxval function. - if let Ok(hwcap) = getauxval(AT_HWCAP) { + #[cfg(feature = "std_detect_dlsym_getauxval")] { + // Try to call a dynamically-linked getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any(target_arch = "aarch64", target_arch = "mips", + target_arch = "mips64"))] + { + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + drop(hwcap); + } + #[cfg(feature = "std_detect_file_io")] { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") + } + #[cfg(not(feature = "std_detect_file_io"))] { + Err(()) + } + } + + #[cfg(not(feature = "std_detect_dlsym_getauxval"))] { + let hwcap = unsafe { ffi_getauxval(AT_HWCAP) }; + // Targets with only AT_HWCAP: #[cfg(any(target_arch = "aarch64", target_arch = "mips", target_arch = "mips64"))] @@ -63,22 +99,18 @@ pub(crate) fn auxv() -> Result { // Targets with AT_HWCAP and AT_HWCAP2: #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))] { - if let Ok(hwcap2) = getauxval(AT_HWCAP2) { - if hwcap != 0 && hwcap2 != 0 { - return Ok(AuxVec { hwcap, hwcap2 }); - } + let hwcap2 = unsafe { ffi_getauxval(AT_HWCAP2) }; + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); } } - drop(hwcap); } - // If calling getauxval fails, try to read the auxiliary vector from - // its file: - auxv_from_file("/proc/self/auxv") } /// Tries to read the `key` from the auxiliary vector by calling the /// dynamically-linked `getauxval` function. If the function is not linked, /// this function return `Err`. +#[cfg(feature = "std_detect_dlsym_getauxval")] fn getauxval(key: usize) -> Result { use libc; pub type F = unsafe extern "C" fn(usize) -> usize; @@ -98,6 +130,7 @@ fn getauxval(key: usize) -> Result { /// Tries to read the auxiliary vector from the `file`. If this fails, this /// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] fn auxv_from_file(file: &str) -> Result { let mut file = File::open(file).map_err(|_| ())?; @@ -117,6 +150,7 @@ fn auxv_from_file(file: &str) -> Result { /// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this /// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] fn auxv_from_buf(buf: &[usize; 64]) -> Result { // Targets with only AT_HWCAP: #[cfg(any(target_arch = "aarch64", target_arch = "mips", @@ -157,6 +191,7 @@ mod tests { // Reads the Auxiliary Vector key from /proc/self/auxv // using the auxv crate. + #[cfg(feature = "std_detect_file_io")] fn auxv_crate_getprocfs(key: usize) -> Option { use self::auxv_crate::AuxvType; use self::auxv_crate::procfs::search_procfs_auxv; @@ -210,6 +245,7 @@ mod tests { } } + #[cfg(feature = "std_detect_file_io")] cfg_if! { if #[cfg(target_arch = "arm")] { #[test] @@ -244,6 +280,7 @@ mod tests { } #[test] + #[cfg(feature = "std_detect_file_io")] fn auxv_dump_procfs() { if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { println!("{:?}", auxvec); diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs index 642dfb46571..e02d5e6dcda 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs @@ -1,6 +1,8 @@ //! Run-time feature detection on Linux mod auxvec; + +#[cfg(feature = "std_detect_file_io")] mod cpuinfo; cfg_if! { diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index 9237d5dc0a5..30199b1f44f 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -1,10 +1,11 @@ //! x86 run-time feature detection is OS independent. -use core::{prelude::v1::*, mem}; #[cfg(target_arch = "x86")] -use core::arch::x86::*; +use arch::x86::*; #[cfg(target_arch = "x86_64")] -use core::arch::x86_64::*; +use arch::x86_64::*; + +use mem; use crate::detect::{Feature, cache, bit}; diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs index af7fc3bdc42..f5278f964a9 100644 --- a/library/stdarch/crates/std_detect/src/lib.rs +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -12,8 +12,7 @@ //! * `powerpc64`: [`is_powerpc64_feature_detected`] #![unstable(feature = "stdsimd", issue = "27731")] -#![feature(const_fn, integer_atomics, staged_api, stdsimd)] -#![feature(doc_cfg, allow_internal_unstable)] +#![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)] #![cfg_attr(feature = "cargo-clippy", allow(clippy::shadow_reuse))] #![cfg_attr( feature = "cargo-clippy", @@ -23,15 +22,29 @@ #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))] #![no_std] -#[cfg(test)] -#[macro_use(println)] -extern crate std; - -extern crate libc; - #[macro_use] extern crate cfg_if; +cfg_if! { + if #[cfg(feature = "std_detect_file_io")] { + #[cfg_attr(test, macro_use(println))] + extern crate std; + + #[allow(unused_imports)] + use std::{arch, fs, io, mem, sync}; + } else { + #[cfg(test)] + #[macro_use(println)] + extern crate std; + + #[allow(unused_imports)] + use core::{arch, mem, sync}; + } +} + +#[cfg(feature = "std_detect_dlsym_getauxval")] +extern crate libc; + #[doc(hidden)] #[unstable(feature = "stdsimd", issue = "27731")] pub mod detect; -- cgit 1.4.1-3-g733a5 From fe06593c6fcbacc92e99fcd94a3b8aba5a2ffb37 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 13 Feb 2019 16:06:45 +0100 Subject: allow_internal_unstable requires feature names Closes #681 . --- library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/arm.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/mips.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/mips64.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs index 7571dfbfec3..3270641eb34 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -3,7 +3,7 @@ /// Checks if `aarch64` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_aarch64_feature_detected { ("neon") => { // FIXME: this should be removed once we rename Aarch64 neon to asimd diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs index 5f91c9269ad..ebed1f757e3 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -3,7 +3,7 @@ /// Checks if `arm` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_arm_feature_detected { ("neon") => { cfg!(target_feature = "neon") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs index 58e3ee6d204..969d6d05dc5 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -3,7 +3,7 @@ /// Checks if `mips` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_mips_feature_detected { ("msa") => { cfg!(target_feature = "msa") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs index aa42c0e55df..d421595c9cd 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -3,7 +3,7 @@ /// Checks if `mips64` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_mips64_feature_detected { ("msa") => { cfg!(target_feature = "msa") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs index 8270e5bee46..9d65437e0ba 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -3,7 +3,7 @@ /// Checks if `powerpc` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_powerpc_feature_detected { ("altivec") => { cfg!(target_feature = "altivec") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs index 2f754713650..4514100907c 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -3,7 +3,7 @@ /// Checks if `powerpc64` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_powerpc64_feature_detected { ("altivec") => { cfg!(target_feature = "altivec") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 5425aeecd0c..953bf29d680 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -79,7 +79,7 @@ /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[macro_export] #[stable(feature = "simd_x86", since = "1.27.0")] -#[allow_internal_unstable] +#[allow_internal_unstable(stdsimd_internal)] macro_rules! is_x86_feature_detected { ("aes") => { cfg!(target_feature = "aes") || $crate::detect::check_for( -- cgit 1.4.1-3-g733a5 From e56de7344f45a3488589ff56da4d38c042b7d7ce Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 13 Feb 2019 22:05:49 +0100 Subject: Fix wasm32 build job --- library/stdarch/ci/run.sh | 2 +- library/stdarch/crates/core_arch/Cargo.toml | 4 ---- library/stdarch/crates/std_detect/src/lib.rs | 2 ++ 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh index 0d45feb846c..bcf9f389090 100755 --- a/library/stdarch/ci/run.sh +++ b/library/stdarch/ci/run.sh @@ -80,7 +80,7 @@ case ${TARGET} in esac -if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then +if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ] && [ "$TARGET" != "wasm32-unknown-unknown" ]; then # Test examples ( cd examples diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml index 56140e0c349..e2e6d62a5e0 100644 --- a/library/stdarch/crates/core_arch/Cargo.toml +++ b/library/stdarch/crates/core_arch/Cargo.toml @@ -30,9 +30,5 @@ std_detect = { version = "0.*", path = "../std_detect" } [target.wasm32-unknown-unknown.dev-dependencies] wasm-bindgen-test = "=0.2.19" -[features] -# Internal-usage only: denies all warnings. -strict = [] - [package.metadata.docs.rs] rustdoc-args = [ "--cfg", "dox" ] \ No newline at end of file diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs index f5278f964a9..da54cc73592 100644 --- a/library/stdarch/crates/std_detect/src/lib.rs +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -20,6 +20,8 @@ )] #![cfg_attr(target_os = "linux", feature(linkage))] #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))] +#![cfg_attr(stdsimd_strict, deny(warnings))] +#![cfg_attr(test, allow(unused_imports))] #![no_std] #[macro_use] -- cgit 1.4.1-3-g733a5 From 049071b50f327d28145356f4ddf89470e3b244f7 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 14 Feb 2019 21:52:41 +0100 Subject: Try to fix upstream --- library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/arm.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/mips.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/mips64.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs | 2 +- library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs index 3270641eb34..ebae2bd2854 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -3,7 +3,7 @@ /// Checks if `aarch64` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_aarch64_feature_detected { ("neon") => { // FIXME: this should be removed once we rename Aarch64 neon to asimd diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs index ebed1f757e3..b2626bf2923 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -3,7 +3,7 @@ /// Checks if `arm` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_arm_feature_detected { ("neon") => { cfg!(target_feature = "neon") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs index 969d6d05dc5..f4381b811cd 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -3,7 +3,7 @@ /// Checks if `mips` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_mips_feature_detected { ("msa") => { cfg!(target_feature = "msa") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs index d421595c9cd..2663bc68ba9 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -3,7 +3,7 @@ /// Checks if `mips64` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_mips64_feature_detected { ("msa") => { cfg!(target_feature = "msa") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs index 9d65437e0ba..a342dc1aacc 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -3,7 +3,7 @@ /// Checks if `powerpc` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_powerpc_feature_detected { ("altivec") => { cfg!(target_feature = "altivec") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs index 4514100907c..2e82c569252 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -3,7 +3,7 @@ /// Checks if `powerpc64` feature is enabled. #[macro_export] #[unstable(feature = "stdsimd", issue = "27731")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_powerpc64_feature_detected { ("altivec") => { cfg!(target_feature = "altivec") || diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 953bf29d680..6a3e11de3df 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -79,7 +79,7 @@ /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[macro_export] #[stable(feature = "simd_x86", since = "1.27.0")] -#[allow_internal_unstable(stdsimd_internal)] +#[allow_internal_unstable(stdsimd_internal,stdsimd)] macro_rules! is_x86_feature_detected { ("aes") => { cfg!(target_feature = "aes") || $crate::detect::check_for( -- cgit 1.4.1-3-g733a5 From a16a30d796f6753992ef1e5c877e5d456a799d19 Mon Sep 17 00:00:00 2001 From: Alexander Regueiro Date: Thu, 31 Jan 2019 00:36:36 +0000 Subject: Various cosmetic improvements. --- .../stdarch/crates/core_arch/src/wasm32/simd128.rs | 79 ++--- library/stdarch/crates/core_arch/src/x86/adx.rs | 12 +- library/stdarch/crates/core_arch/src/x86/aes.rs | 10 +- library/stdarch/crates/core_arch/src/x86/avx.rs | 356 ++++++++++--------- library/stdarch/crates/core_arch/src/x86/avx2.rs | 384 ++++++++++----------- .../stdarch/crates/core_arch/src/x86/avx512f.rs | 8 +- library/stdarch/crates/core_arch/src/x86/bmi1.rs | 4 +- library/stdarch/crates/core_arch/src/x86/bmi2.rs | 2 +- library/stdarch/crates/core_arch/src/x86/bswap.rs | 2 +- library/stdarch/crates/core_arch/src/x86/fma.rs | 68 ++-- library/stdarch/crates/core_arch/src/x86/mmx.rs | 52 +-- .../stdarch/crates/core_arch/src/x86/pclmulqdq.rs | 2 +- library/stdarch/crates/core_arch/src/x86/rdrand.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sha.rs | 12 +- library/stdarch/crates/core_arch/src/x86/sse.rs | 172 ++++----- library/stdarch/crates/core_arch/src/x86/sse2.rs | 370 ++++++++++---------- library/stdarch/crates/core_arch/src/x86/sse3.rs | 10 +- library/stdarch/crates/core_arch/src/x86/sse41.rs | 72 ++-- library/stdarch/crates/core_arch/src/x86/sse42.rs | 38 +- library/stdarch/crates/core_arch/src/x86/ssse3.rs | 60 ++-- library/stdarch/crates/core_arch/src/x86/xsave.rs | 14 +- library/stdarch/crates/core_arch/src/x86_64/adx.rs | 12 +- library/stdarch/crates/core_arch/src/x86_64/avx.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/avx2.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/bmi.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/bmi2.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/bswap.rs | 2 +- .../crates/core_arch/src/x86_64/cmpxchg16b.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/rdrand.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/sse.rs | 6 +- .../stdarch/crates/core_arch/src/x86_64/sse2.rs | 16 +- .../stdarch/crates/core_arch/src/x86_64/sse41.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/xsave.rs | 12 +- .../crates/std_detect/src/detect/arch/x86.rs | 7 +- .../stdarch/crates/std_detect/src/detect/cache.rs | 2 +- .../std_detect/src/detect/os/linux/auxvec.rs | 2 +- library/stdarch/crates/stdsimd-test/src/lib.rs | 29 +- 37 files changed, 916 insertions(+), 921 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs index 591d8062ca5..b742f12e185 100644 --- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs +++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs @@ -18,8 +18,9 @@ use stdsimd_test::assert_instr; use wasm_bindgen_test::wasm_bindgen_test; types! { - /// WASM-specific 128-bit wide SIMD vector type - pub struct v128(i32, i32, i32, i32); // NB: internals here are arbitrary + /// WASM-specific 128-bit wide SIMD vector type. + // N.B., internals here are arbitrary. + pub struct v128(i32, i32, i32, i32); } #[allow(non_camel_case_types)] @@ -144,21 +145,21 @@ extern "C" { fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16; } -/// Load a `v128` vector from the given heap address. +/// Loads a `v128` vector from the given heap address. #[inline] #[cfg_attr(test, assert_instr(v128.load))] pub unsafe fn v128_load(m: *const v128) -> v128 { ptr::read(m) } -/// Store a `v128` vector to the given heap address. +/// Stores a `v128` vector to the given heap address. #[inline] #[cfg_attr(test, assert_instr(v128.store))] pub unsafe fn v128_store(m: *mut v128, a: v128) { ptr::write(m, a) } -/// Materialize a constant SIMD value from the immediate operands. +/// Materializes a constant SIMD value from the immediate operands. /// /// The `v128.const` instruction is encoded with 16 immediate bytes /// `imm` which provide the bits of the vector directly. @@ -215,18 +216,18 @@ pub const fn v128_const( } } -/// Create vector with identical lanes +/// Creates a vector with identical lanes. /// -/// Construct a vector with `x` replicated to all 16 lanes. +/// Constructs a vector with `x` replicated to all 16 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn i8x16_splat(a: i8) -> v128 { unsafe { mem::transmute(i8x16::splat(a)) } } -/// Extract lane from a 128-bit vector interpreted as 16 packed i8 numbers. +/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// -/// Extract the scalar value of lane specified in the immediate mode operand +/// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety @@ -249,9 +250,9 @@ pub unsafe fn i8x16_extract_lane(a: v128, imm: usize) -> i8 { simd_extract(a.as_i8x16(), imm as u32) } -/// Replace a lane from a 128-bit vector interpreted as 16 packed i8 numbers. +/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// -/// Replace the scalar value of lane specified in the immediate mode operand +/// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety @@ -265,7 +266,7 @@ pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 { mem::transmute(simd_insert(a.as_i8x16(), imm as u32, val)) } -/// Create vector with identical lanes +/// Creates a vector with identical lanes. /// /// Construct a vector with `x` replicated to all 8 lanes. #[inline] @@ -274,9 +275,9 @@ pub fn i16x8_splat(a: i16) -> v128 { unsafe { mem::transmute(i16x8::splat(a)) } } -/// Extract lane from a 128-bit vector interpreted as 8 packed i16 numbers. +/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// -/// Extract the scalar value of lane specified in the immediate mode operand +/// Extracts a the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety @@ -299,9 +300,9 @@ pub unsafe fn i16x8_extract_lane(a: v128, imm: usize) -> i16 { simd_extract(a.as_i16x8(), imm as u32) } -/// Replace a lane from a 128-bit vector interpreted as 8 packed i16 numbers. +/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// -/// Replace the scalar value of lane specified in the immediate mode operand +/// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety @@ -315,18 +316,18 @@ pub unsafe fn i16x8_replace_lane(a: v128, imm: usize, val: i16) -> v128 { mem::transmute(simd_insert(a.as_i16x8(), imm as u32, val)) } -/// Create vector with identical lanes +/// Creates a vector with identical lanes. /// -/// Construct a vector with `x` replicated to all 4 lanes. +/// Constructs a vector with `x` replicated to all 4 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn i32x4_splat(a: i32) -> v128 { unsafe { mem::transmute(i32x4::splat(a)) } } -/// Extract lane from a 128-bit vector interpreted as 4 packed i32 numbers. +/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// -/// Extract the scalar value of lane specified in the immediate mode operand +/// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety @@ -340,9 +341,9 @@ pub unsafe fn i32x4_extract_lane(a: v128, imm: usize) -> i32 { simd_extract(a.as_i32x4(), imm as u32) } -/// Replace a lane from a 128-bit vector interpreted as 4 packed i32 numbers. +/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// -/// Replace the scalar value of lane specified in the immediate mode operand +/// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety @@ -356,7 +357,7 @@ pub unsafe fn i32x4_replace_lane(a: v128, imm: usize, val: i32) -> v128 { mem::transmute(simd_insert(a.as_i32x4(), imm as u32, val)) } -/// Create vector with identical lanes +/// Creates a vector with identical lanes. /// /// Construct a vector with `x` replicated to all 2 lanes. #[inline] @@ -365,9 +366,9 @@ pub fn i64x2_splat(a: i64) -> v128 { unsafe { mem::transmute(i64x2::splat(a)) } } -/// Extract lane from a 128-bit vector interpreted as 2 packed i64 numbers. +/// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// -/// Extract the scalar value of lane specified in the immediate mode operand +/// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety @@ -381,9 +382,9 @@ pub unsafe fn i64x2_extract_lane(a: v128, imm: usize) -> i64 { simd_extract(a.as_i64x2(), imm as u32) } -/// Replace a lane from a 128-bit vector interpreted as 2 packed i64 numbers. +/// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// -/// Replace the scalar value of lane specified in the immediate mode operand +/// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety @@ -397,18 +398,18 @@ pub unsafe fn i64x2_replace_lane(a: v128, imm: usize, val: i64) -> v128 { mem::transmute(simd_insert(a.as_i64x2(), imm as u32, val)) } -/// Create vector with identical lanes +/// Creates a vector with identical lanes. /// -/// Construct a vector with `x` replicated to all 4 lanes. +/// Constructs a vector with `x` replicated to all 4 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn f32x4_splat(a: f32) -> v128 { unsafe { mem::transmute(f32x4::splat(a)) } } -/// Extract lane from a 128-bit vector interpreted as 4 packed f32 numbers. +/// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// -/// Extract the scalar value of lane specified in the immediate mode operand +/// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety @@ -422,9 +423,9 @@ pub unsafe fn f32x4_extract_lane(a: v128, imm: usize) -> f32 { simd_extract(a.as_f32x4(), imm as u32) } -/// Replace a lane from a 128-bit vector interpreted as 4 packed f32 numbers. +/// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// -/// Replace the scalar value of lane specified in the immediate mode operand +/// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety @@ -438,18 +439,18 @@ pub unsafe fn f32x4_replace_lane(a: v128, imm: usize, val: f32) -> v128 { mem::transmute(simd_insert(a.as_f32x4(), imm as u32, val)) } -/// Create vector with identical lanes +/// Creates a vector with identical lanes. /// -/// Construct a vector with `x` replicated to all 2 lanes. +/// Constructs a vector with `x` replicated to all 2 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn f64x2_splat(a: f64) -> v128 { unsafe { mem::transmute(f64x2::splat(a)) } } -/// Extract lane from a 128-bit vector interpreted as 2 packed f64 numbers. +/// Extracts lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// -/// Extract the scalar value of lane specified in the immediate mode operand +/// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety @@ -463,9 +464,9 @@ pub unsafe fn f64x2_extract_lane(a: v128, imm: usize) -> f64 { simd_extract(a.as_f64x2(), imm as u32) } -/// Replace a lane from a 128-bit vector interpreted as 2 packed f64 numbers. +/// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// -/// Replace the scalar value of lane specified in the immediate mode operand +/// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs index 8d4c0508388..5800d27c18c 100644 --- a/library/stdarch/crates/core_arch/src/x86/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86/adx.rs @@ -11,8 +11,8 @@ extern "unadjusted" { fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32); } -/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in` -/// (carry flag), and store the unsigned 32-bit result in out, and the carry-out +/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry flag), and store the unsigned 32-bit result in `out`, and the carry-out /// is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(adc))] @@ -23,8 +23,8 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { a } -/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in` -/// (carry or overflow flag), and store the unsigned 32-bit result in out, and +/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[target_feature(enable = "adx")] @@ -36,8 +36,8 @@ pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { r } -/// Add unsigned 32-bit integers a and b with unsigned 8-bit carry-in `c_in` -/// (carry or overflow flag), and store the unsigned 32-bit result in out, and +/// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(sbb))] diff --git a/library/stdarch/crates/core_arch/src/x86/aes.rs b/library/stdarch/crates/core_arch/src/x86/aes.rs index c90bb122d5a..2fc6dc67455 100644 --- a/library/stdarch/crates/core_arch/src/x86/aes.rs +++ b/library/stdarch/crates/core_arch/src/x86/aes.rs @@ -28,7 +28,7 @@ extern "C" { fn aeskeygenassist(a: __m128i, imm8: u8) -> __m128i; } -/// Perform one round of an AES decryption flow on data (state) in `a`. +/// Performs one round of an AES decryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128) #[inline] @@ -39,7 +39,7 @@ pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { aesdec(a, round_key) } -/// Perform the last round of an AES decryption flow on data (state) in `a`. +/// Performs the last round of an AES decryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128) #[inline] @@ -50,7 +50,7 @@ pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { aesdeclast(a, round_key) } -/// Perform one round of an AES encryption flow on data (state) in `a`. +/// Performs one round of an AES encryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128) #[inline] @@ -61,7 +61,7 @@ pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { aesenc(a, round_key) } -/// Perform the last round of an AES encryption flow on data (state) in `a`. +/// Performs the last round of an AES encryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128) #[inline] @@ -72,7 +72,7 @@ pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { aesenclast(a, round_key) } -/// Perform the `InvMixColumns` transformation on `a`. +/// Performs the `InvMixColumns` transformation on `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index a26eb8f12b9..28e53c9e8bc 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -23,7 +23,7 @@ use ptr; #[cfg(test)] use stdsimd_test::assert_instr; -/// Add packed double-precision (64-bit) floating-point elements +/// Adds packed double-precision (64-bit) floating-point elements /// in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_pd) @@ -35,7 +35,7 @@ pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { simd_add(a, b) } -/// Add packed single-precision (32-bit) floating-point elements in `a` and +/// Adds packed single-precision (32-bit) floating-point elements in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ps) @@ -47,9 +47,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { simd_add(a, b) } -/// Compute the bitwise AND of a packed double-precision (64-bit) -/// floating-point elements -/// in `a` and `b`. +/// Computes the bitwise AND of a packed double-precision (64-bit) +/// floating-point elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_pd) #[inline] @@ -64,7 +63,7 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { mem::transmute(simd_and(a, b)) } -/// Compute the bitwise AND of packed single-precision (32-bit) floating-point +/// Computes the bitwise AND of packed single-precision (32-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_ps) @@ -78,14 +77,14 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { mem::transmute(simd_and(a, b)) } -/// Compute the bitwise OR packed double-precision (64-bit) floating-point +/// Computes the bitwise OR packed double-precision (64-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_pd) #[inline] #[target_feature(enable = "avx")] -// FIXME: Should be 'vorpd' instuction. -// See https://github.com/rust-lang-nursery/stdsimd/issues/71 +// FIXME: should be `vorpd` instuction. +// See . #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { @@ -94,7 +93,7 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { mem::transmute(simd_or(a, b)) } -/// Compute the bitwise OR packed single-precision (32-bit) floating-point +/// Computes the bitwise OR packed single-precision (32-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_ps) @@ -108,7 +107,7 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { mem::transmute(simd_or(a, b)) } -/// Shuffle double-precision (64-bit) floating-point elements within 128-bit +/// Shuffles double-precision (64-bit) floating-point elements within 128-bit /// lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd) @@ -154,7 +153,7 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { } } -/// Shuffle single-precision (32-bit) floating-point elements in `a` within +/// Shuffles single-precision (32-bit) floating-point elements in `a` within /// 128-bit lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps) @@ -217,14 +216,13 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { } } -/// Compute the bitwise NOT of packed double-precision (64-bit) floating-point -/// elements in `a` -/// and then AND with `b`. +/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point +/// elements in `a`, and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_pd) #[inline] #[target_feature(enable = "avx")] -// FIXME: Should be 'vandnpd' instruction. +// FIXME: should be `vandnpd` instruction. #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { @@ -233,7 +231,7 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { mem::transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) } -/// Compute the bitwise NOT of packed single-precision (32-bit) floating-point +/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point /// elements in `a` /// and then AND with `b`. /// @@ -248,8 +246,8 @@ pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { mem::transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) } -/// Compare packed double-precision (64-bit) floating-point elements -/// in `a` and `b`, and return packed maximum values +/// Compares packed double-precision (64-bit) floating-point elements +/// in `a` and `b`, and returns packed maximum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_pd) #[inline] @@ -260,8 +258,8 @@ pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { maxpd256(a, b) } -/// Compare packed single-precision (32-bit) floating-point elements in `a` -/// and `b`, and return packed maximum values +/// Compares packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and returns packed maximum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ps) #[inline] @@ -272,8 +270,8 @@ pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { maxps256(a, b) } -/// Compare packed double-precision (64-bit) floating-point elements -/// in `a` and `b`, and return packed minimum values +/// Compares packed double-precision (64-bit) floating-point elements +/// in `a` and `b`, and returns packed minimum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_pd) #[inline] @@ -284,8 +282,8 @@ pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { minpd256(a, b) } -/// Compare packed single-precision (32-bit) floating-point elements in `a` -/// and `b`, and return packed minimum values +/// Compares packed single-precision (32-bit) floating-point elements in `a` +/// and `b`, and returns packed minimum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ps) #[inline] @@ -296,7 +294,7 @@ pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { minps256(a, b) } -/// Multiply packed double-precision (64-bit) floating-point elements +/// Multiplies packed double-precision (64-bit) floating-point elements /// in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pd) @@ -308,7 +306,7 @@ pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { simd_mul(a, b) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` and +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ps) @@ -320,7 +318,7 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { simd_mul(a, b) } -/// Alternatively add and subtract packed double-precision (64-bit) +/// Alternatively adds and subtracts packed double-precision (64-bit) /// floating-point elements in `a` to/from packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_pd) @@ -332,7 +330,7 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { addsubpd256(a, b) } -/// Alternatively add and subtract packed single-precision (32-bit) +/// Alternatively adds and subtracts packed single-precision (32-bit) /// floating-point elements in `a` to/from packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_ps) @@ -344,7 +342,7 @@ pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { addsubps256(a, b) } -/// Subtract packed double-precision (64-bit) floating-point elements in `b` +/// Subtracts packed double-precision (64-bit) floating-point elements in `b` /// from packed elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_pd) @@ -356,7 +354,7 @@ pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { simd_sub(a, b) } -/// Subtract packed single-precision (32-bit) floating-point elements in `b` +/// Subtracts packed single-precision (32-bit) floating-point elements in `b` /// from packed elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ps) @@ -368,7 +366,7 @@ pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { simd_sub(a, b) } -/// Compute the division of each of the 8 packed 32-bit floating-point elements +/// Computes the division of each of the 8 packed 32-bit floating-point elements /// in `a` by the corresponding packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ps) @@ -380,7 +378,7 @@ pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { simd_div(a, b) } -/// Compute the division of each of the 4 packed 64-bit floating-point elements +/// Computes the division of each of the 4 packed 64-bit floating-point elements /// in `a` by the corresponding packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_pd) @@ -392,7 +390,7 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { simd_div(a, b) } -/// Round packed double-precision (64-bit) floating point elements in `a` +/// Rounds packed double-precision (64-bit) floating point elements in `a` /// according to the flag `b`. The value of `b` may be as follows: /// /// - `0x00`: Round to the nearest whole number. @@ -419,7 +417,7 @@ pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d { constify_imm8!(b, call) } -/// Round packed double-precision (64-bit) floating point elements in `a` +/// Rounds packed double-precision (64-bit) floating point elements in `a` /// toward positive infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_pd) @@ -431,7 +429,7 @@ pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { roundpd256(a, 0x02) } -/// Round packed double-precision (64-bit) floating point elements in `a` +/// Rounds packed double-precision (64-bit) floating point elements in `a` /// toward negative infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_pd) @@ -443,7 +441,7 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { roundpd256(a, 0x01) } -/// Round packed single-precision (32-bit) floating point elements in `a` +/// Rounds packed single-precision (32-bit) floating point elements in `a` /// according to the flag `b`. The value of `b` may be as follows: /// /// - `0x00`: Round to the nearest whole number. @@ -470,7 +468,7 @@ pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 { constify_imm8!(b, call) } -/// Round packed single-precision (32-bit) floating point elements in `a` +/// Rounds packed single-precision (32-bit) floating point elements in `a` /// toward positive infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_ps) @@ -482,7 +480,7 @@ pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { roundps256(a, 0x02) } -/// Round packed single-precision (32-bit) floating point elements in `a` +/// Rounds packed single-precision (32-bit) floating point elements in `a` /// toward negative infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_ps) @@ -494,7 +492,7 @@ pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { roundps256(a, 0x01) } -/// Return the square root of packed single-precision (32-bit) floating point +/// Returns the square root of packed single-precision (32-bit) floating point /// elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ps) @@ -506,7 +504,7 @@ pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { sqrtps256(a) } -/// Return the square root of packed double-precision (64-bit) floating point +/// Returns the square root of packed double-precision (64-bit) floating point /// elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_pd) @@ -518,7 +516,7 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { sqrtpd256(a) } -/// Blend packed double-precision (64-bit) floating-point elements from +/// Blends packed double-precision (64-bit) floating-point elements from /// `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd) @@ -567,7 +565,7 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { } } -/// Blend packed single-precision (32-bit) floating-point elements from +/// Blends packed single-precision (32-bit) floating-point elements from /// `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps) @@ -630,7 +628,7 @@ pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { } } -/// Blend packed double-precision (64-bit) floating-point elements from +/// Blends packed double-precision (64-bit) floating-point elements from /// `a` and `b` using `c` as a mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_pd) @@ -642,7 +640,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vblendvpd(a, b, c) } -/// Blend packed single-precision (32-bit) floating-point elements from +/// Blends packed single-precision (32-bit) floating-point elements from /// `a` and `b` using `c` as a mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_ps) @@ -654,7 +652,7 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vblendvps(a, b, c) } -/// Conditionally multiply the packed single-precision (32-bit) floating-point +/// Conditionally multiplies the packed single-precision (32-bit) floating-point /// elements in `a` and `b` using the high 4 bits in `imm8`, /// sum the four products, and conditionally return the sum /// using the low 4 bits of `imm8`. @@ -732,7 +730,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { vhsubps(a, b) } -/// Compute the bitwise XOR of packed double-precision (64-bit) floating-point +/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_pd) @@ -747,7 +745,7 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { mem::transmute(simd_xor(a, b)) } -/// Compute the bitwise XOR of packed single-precision (32-bit) floating-point +/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_ps) @@ -858,7 +856,7 @@ pub const _CMP_GT_OQ: i32 = 0x1e; #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_TRUE_US: i32 = 0x1f; -/// Compare packed double-precision (64-bit) floating-point +/// Compares packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// @@ -877,7 +875,7 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { constify_imm6!(imm8, call) } -/// Compare packed double-precision (64-bit) floating-point +/// Compares packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// @@ -896,7 +894,7 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { constify_imm6!(imm8, call) } -/// Compare packed single-precision (32-bit) floating-point +/// Compares packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// @@ -915,7 +913,7 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { constify_imm6!(imm8, call) } -/// Compare packed single-precision (32-bit) floating-point +/// Compares packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// @@ -934,10 +932,10 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { constify_imm6!(imm8, call) } -/// Compare the lower double-precision (64-bit) floating-point element in +/// Compares the lower double-precision (64-bit) floating-point element in /// `a` and `b` based on the comparison operand specified by `imm8`, /// store the result in the lower element of returned vector, -/// and copy the upper element from `a` to the upper element of returned +/// and copies the upper element from `a` to the upper element of returned /// vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd) @@ -955,10 +953,10 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { constify_imm6!(imm8, call) } -/// Compare the lower single-precision (32-bit) floating-point element in +/// Compares the lower single-precision (32-bit) floating-point element in /// `a` and `b` based on the comparison operand specified by `imm8`, /// store the result in the lower element of returned vector, -/// and copy the upper 3 packed elements from `a` to the upper elements of +/// and copies the upper 3 packed elements from `a` to the upper elements of /// returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss) @@ -976,7 +974,7 @@ pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { constify_imm6!(imm8, call) } -/// Convert packed 32-bit integers in `a` to packed double-precision (64-bit) +/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_pd) @@ -988,7 +986,7 @@ pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { simd_cast(a.as_i32x4()) } -/// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) +/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ps) @@ -1000,7 +998,7 @@ pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { vcvtdq2ps(a.as_i32x8()) } -/// Convert packed double-precision (64-bit) floating-point elements in `a` +/// Converts packed double-precision (64-bit) floating-point elements in `a` /// to packed single-precision (32-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ps) @@ -1012,7 +1010,7 @@ pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { vcvtpd2ps(a) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` +/// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_epi32) @@ -1024,7 +1022,7 @@ pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { mem::transmute(vcvtps2dq(a)) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` +/// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed double-precision (64-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_pd) @@ -1036,7 +1034,7 @@ pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { simd_cast(a) } -/// Convert packed double-precision (64-bit) floating-point elements in `a` +/// Converts packed double-precision (64-bit) floating-point elements in `a` /// to packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttpd_epi32) @@ -1048,7 +1046,7 @@ pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { mem::transmute(vcvttpd2dq(a)) } -/// Convert packed double-precision (64-bit) floating-point elements in `a` +/// Converts packed double-precision (64-bit) floating-point elements in `a` /// to packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_epi32) @@ -1060,7 +1058,7 @@ pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { mem::transmute(vcvtpd2dq(a)) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` +/// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttps_epi32) @@ -1072,7 +1070,7 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { mem::transmute(vcvttps2dq(a)) } -/// Extract 128 bits (composed of 4 packed single-precision (32-bit) +/// Extracts 128 bits (composed of 4 packed single-precision (32-bit) /// floating-point elements) from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_ps) @@ -1091,7 +1089,7 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { } } -/// Extract 128 bits (composed of 2 packed double-precision (64-bit) +/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_pd) @@ -1110,7 +1108,7 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { } } -/// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`. +/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_si256) #[inline] @@ -1130,7 +1128,7 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { mem::transmute(dst) } -/// Zero the contents of all XMM or YMM registers. +/// Zeroes the contents of all XMM or YMM registers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroall) #[inline] @@ -1141,7 +1139,7 @@ pub unsafe fn _mm256_zeroall() { vzeroall() } -/// Zero the upper 128 bits of all YMM registers; +/// Zeroes the upper 128 bits of all YMM registers; /// the lower 128-bits of the registers are unmodified. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroupper) @@ -1153,7 +1151,7 @@ pub unsafe fn _mm256_zeroupper() { vzeroupper() } -/// Shuffle single-precision (32-bit) floating-point elements in `a` +/// Shuffles single-precision (32-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_ps) @@ -1165,7 +1163,7 @@ pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { vpermilps256(a, b.as_i32x8()) } -/// Shuffle single-precision (32-bit) floating-point elements in `a` +/// Shuffles single-precision (32-bit) floating-point elements in `a` /// using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_ps) @@ -1177,7 +1175,7 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { vpermilps(a, b.as_i32x4()) } -/// Shuffle single-precision (32-bit) floating-point elements in `a` +/// Shuffles single-precision (32-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps) @@ -1235,7 +1233,7 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { } } -/// Shuffle single-precision (32-bit) floating-point elements in `a` +/// Shuffles single-precision (32-bit) floating-point elements in `a` /// using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps) @@ -1289,7 +1287,7 @@ pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 { } } -/// Shuffle double-precision (64-bit) floating-point elements in `a` +/// Shuffles double-precision (64-bit) floating-point elements in `a` /// within 256-bit lanes using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_pd) @@ -1301,7 +1299,7 @@ pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { vpermilpd256(a, b.as_i64x4()) } -/// Shuffle double-precision (64-bit) floating-point elements in `a` +/// Shuffles double-precision (64-bit) floating-point elements in `a` /// using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_pd) @@ -1313,7 +1311,7 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { vpermilpd(a, b.as_i64x2()) } -/// Shuffle double-precision (64-bit) floating-point elements in `a` +/// Shuffles double-precision (64-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd) @@ -1359,7 +1357,7 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { } } -/// Shuffle double-precision (64-bit) floating-point elements in `a` +/// Shuffles double-precision (64-bit) floating-point elements in `a` /// using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd) @@ -1389,7 +1387,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { } } -/// Shuffle 256-bits (composed of 8 packed single-precision (32-bit) +/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) selected by `imm8` from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_ps) @@ -1407,7 +1405,7 @@ pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256, imm8: i32) -> __m256 constify_imm8!(imm8, call) } -/// Shuffle 256-bits (composed of 4 packed double-precision (64-bit) +/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) selected by `imm8` from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_pd) @@ -1425,7 +1423,7 @@ pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d, imm8: i32) -> __m25 constify_imm8!(imm8, call) } -/// Shuffle 258-bits (composed of integer data) selected by `imm8` +/// Shuffles 258-bits (composed of integer data) selected by `imm8` /// from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_si256) @@ -1446,7 +1444,7 @@ pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i, imm8: i32) -> __ mem::transmute(r) } -/// Broadcast a single-precision (32-bit) floating-point element from memory +/// Broadcasts a single-precision (32-bit) floating-point element from memory /// to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ss) @@ -1459,7 +1457,7 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { _mm256_set1_ps(*f) } -/// Broadcast a single-precision (32-bit) floating-point element from memory +/// Broadcasts a single-precision (32-bit) floating-point element from memory /// to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcast_ss) @@ -1472,7 +1470,7 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { _mm_set1_ps(*f) } -/// Broadcast a double-precision (64-bit) floating-point element from memory +/// Broadcasts a double-precision (64-bit) floating-point element from memory /// to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_sd) @@ -1485,7 +1483,7 @@ pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d { _mm256_set1_pd(*f) } -/// Broadcast 128 bits from memory (composed of 4 packed single-precision +/// Broadcasts 128 bits from memory (composed of 4 packed single-precision /// (32-bit) floating-point elements) to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ps) @@ -1497,7 +1495,7 @@ pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 { vbroadcastf128ps256(a) } -/// Broadcast 128 bits from memory (composed of 2 packed double-precision +/// Broadcasts 128 bits from memory (composed of 2 packed double-precision /// (64-bit) floating-point elements) to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_pd) @@ -1509,7 +1507,7 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { vbroadcastf128pd256(a) } -/// Copy `a` to result, then insert 128 bits (composed of 4 packed +/// Copies `a` to result, then inserts 128 bits (composed of 4 packed /// single-precision (32-bit) floating-point elements) from `b` into result /// at the location specified by `imm8`. /// @@ -1530,7 +1528,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { } } -/// Copy `a` to result, then insert 128 bits (composed of 2 packed +/// Copies `a` to result, then inserts 128 bits (composed of 2 packed /// double-precision (64-bit) floating-point elements) from `b` into result /// at the location specified by `imm8`. /// @@ -1550,7 +1548,7 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d } } -/// Copy `a` to result, then insert 128 bits from `b` into result +/// Copies `a` to result, then inserts 128 bits from `b` into result /// at the location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_si256) @@ -1571,7 +1569,7 @@ pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m2 mem::transmute(dst) } -/// Copy `a` to result, and insert the 8-bit integer `i` into result +/// Copies `a` to result, and inserts the 8-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi8) @@ -1584,7 +1582,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i)) } -/// Copy `a` to result, and insert the 16-bit integer `i` into result +/// Copies `a` to result, and inserts the 16-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi16) @@ -1597,7 +1595,7 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i)) } -/// Copy `a` to result, and insert the 32-bit integer `i` into result +/// Copies `a` to result, and inserts the 32-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi32) @@ -1610,7 +1608,7 @@ pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { mem::transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i)) } -/// Load 256-bits (composed of 4 packed double-precision (64-bit) +/// Loads 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. @@ -1625,7 +1623,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { *(mem_addr as *const __m256d) } -/// Store 256-bits (composed of 4 packed double-precision (64-bit) +/// Stores 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. @@ -1640,7 +1638,7 @@ pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { *(mem_addr as *mut __m256d) = a; } -/// Load 256-bits (composed of 8 packed single-precision (32-bit) +/// Loads 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. @@ -1655,7 +1653,7 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { *(mem_addr as *const __m256) } -/// Store 256-bits (composed of 8 packed single-precision (32-bit) +/// Stores 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. @@ -1670,7 +1668,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) { *(mem_addr as *mut __m256) = a; } -/// Load 256-bits (composed of 4 packed double-precision (64-bit) +/// Loads 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1689,7 +1687,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { dst } -/// Store 256-bits (composed of 4 packed double-precision (64-bit) +/// Stores 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1702,7 +1700,7 @@ pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { storeupd256(mem_addr, a); } -/// Load 256-bits (composed of 8 packed single-precision (32-bit) +/// Loads 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1721,7 +1719,7 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { dst } -/// Store 256-bits (composed of 8 packed single-precision (32-bit) +/// Stores 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1734,7 +1732,7 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { storeups256(mem_addr, a); } -/// Load 256-bits of integer data from memory into result. +/// Loads 256-bits of integer data from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// @@ -1747,7 +1745,7 @@ pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i { *mem_addr } -/// Store 256-bits of integer data from `a` into memory. +/// Stores 256-bits of integer data from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// @@ -1760,7 +1758,7 @@ pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) { *mem_addr = a; } -/// Load 256-bits of integer data from memory into result. +/// Loads 256-bits of integer data from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_si256) @@ -1778,7 +1776,7 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { dst } -/// Store 256-bits of integer data from `a` into memory. +/// Stores 256-bits of integer data from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_si256) @@ -1790,7 +1788,7 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { storeudq256(mem_addr as *mut i8, a.as_i8x32()); } -/// Load packed double-precision (64-bit) floating-point elements from memory +/// Loads packed double-precision (64-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// @@ -1803,7 +1801,7 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d maskloadpd256(mem_addr as *const i8, mask.as_i64x4()) } -/// Store packed double-precision (64-bit) floating-point elements from `a` +/// Stores packed double-precision (64-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_pd) @@ -1815,7 +1813,7 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a); } -/// Load packed double-precision (64-bit) floating-point elements from memory +/// Loads packed double-precision (64-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// @@ -1828,7 +1826,7 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d { maskloadpd(mem_addr as *const i8, mask.as_i64x2()) } -/// Store packed double-precision (64-bit) floating-point elements from `a` +/// Stores packed double-precision (64-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_pd) @@ -1840,7 +1838,7 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) { maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a); } -/// Load packed single-precision (32-bit) floating-point elements from memory +/// Loads packed single-precision (32-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// @@ -1853,7 +1851,7 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 maskloadps256(mem_addr as *const i8, mask.as_i32x8()) } -/// Store packed single-precision (32-bit) floating-point elements from `a` +/// Stores packed single-precision (32-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_ps) @@ -1865,7 +1863,7 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a); } -/// Load packed single-precision (32-bit) floating-point elements from memory +/// Loads packed single-precision (32-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// @@ -1878,7 +1876,7 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 { maskloadps(mem_addr as *const i8, mask.as_i32x4()) } -/// Store packed single-precision (32-bit) floating-point elements from `a` +/// Stores packed single-precision (32-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_ps) @@ -1891,7 +1889,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements -/// from `a`, and return the results. +/// from `a`, and returns the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movehdup_ps) #[inline] @@ -1903,7 +1901,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { } /// Duplicate even-indexed single-precision (32-bit) floating-point elements -/// from `a`, and return the results. +/// from `a`, and returns the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_moveldup_ps) #[inline] @@ -1915,7 +1913,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { } /// Duplicate even-indexed double-precision (64-bit) floating-point elements -/// from "a", and return the results. +/// from "a", and returns the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movedup_pd) #[inline] @@ -1926,7 +1924,7 @@ pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { simd_shuffle4(a, a, [0, 0, 2, 2]) } -/// Load 256-bits of integer data from unaligned memory into result. +/// Loads 256-bits of integer data from unaligned memory into result. /// This intrinsic may perform better than `_mm256_loadu_si256` when the /// data crosses a cache line boundary. /// @@ -1981,8 +1979,8 @@ pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { intrinsics::nontemporal_store(mem_addr as *mut __m256, a); } -/// Compute the approximate reciprocal of packed single-precision (32-bit) -/// floating-point elements in `a`, and return the results. The maximum +/// Computes the approximate reciprocal of packed single-precision (32-bit) +/// floating-point elements in `a`, and returns the results. The maximum /// relative error for this approximation is less than 1.5*2^-12. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ps) @@ -1994,8 +1992,8 @@ pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { vrcpps(a) } -/// Compute the approximate reciprocal square root of packed single-precision -/// (32-bit) floating-point elements in `a`, and return the results. +/// Computes the approximate reciprocal square root of packed single-precision +/// (32-bit) floating-point elements in `a`, and returns the results. /// The maximum relative error for this approximation is less than 1.5*2^-12. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ps) @@ -2007,7 +2005,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { vrsqrtps(a) } -/// Unpack and interleave double-precision (64-bit) floating-point elements +/// Unpacks and interleave double-precision (64-bit) floating-point elements /// from the high half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_pd) @@ -2019,7 +2017,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [1, 5, 3, 7]) } -/// Unpack and interleave single-precision (32-bit) floating-point elements +/// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the high half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_ps) @@ -2031,7 +2029,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } -/// Unpack and interleave double-precision (64-bit) floating-point elements +/// Unpacks and interleave double-precision (64-bit) floating-point elements /// from the low half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_pd) @@ -2043,7 +2041,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [0, 4, 2, 6]) } -/// Unpack and interleave single-precision (32-bit) floating-point elements +/// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the low half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_ps) @@ -2055,9 +2053,9 @@ pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } -/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and +/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. -/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return the `ZF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_si256) @@ -2069,9 +2067,9 @@ pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { ptestz256(a.as_i64x4(), b.as_i64x4()) } -/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and +/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. -/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return the `CF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_si256) @@ -2083,9 +2081,9 @@ pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { ptestc256(a.as_i64x4(), b.as_i64x4()) } -/// Compute the bitwise AND of 256 bits (representing integer data) in `a` and +/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. -/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if +/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and /// `CF` values are zero, otherwise return 0. /// @@ -2098,7 +2096,7 @@ pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { ptestnzc256(a.as_i64x4(), b.as_i64x4()) } -/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) +/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2115,7 +2113,7 @@ pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { vtestzpd256(a, b) } -/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) +/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2132,7 +2130,7 @@ pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { vtestcpd256(a, b) } -/// Compute the bitwise AND of 256 bits (representing double-precision (64-bit) +/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2150,7 +2148,7 @@ pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { vtestnzcpd256(a, b) } -/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit) +/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2167,7 +2165,7 @@ pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { vtestzpd(a, b) } -/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit) +/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2184,7 +2182,7 @@ pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { vtestcpd(a, b) } -/// Compute the bitwise AND of 128 bits (representing double-precision (64-bit) +/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2202,7 +2200,7 @@ pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { vtestnzcpd(a, b) } -/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit) +/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2219,7 +2217,7 @@ pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { vtestzps256(a, b) } -/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit) +/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2236,7 +2234,7 @@ pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { vtestcps256(a, b) } -/// Compute the bitwise AND of 256 bits (representing single-precision (32-bit) +/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2254,7 +2252,7 @@ pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { vtestnzcps256(a, b) } -/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit) +/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2271,7 +2269,7 @@ pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { vtestzps(a, b) } -/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit) +/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2288,7 +2286,7 @@ pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { vtestcps(a, b) } -/// Compute the bitwise AND of 128 bits (representing single-precision (32-bit) +/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise @@ -2306,7 +2304,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { vtestnzcps(a, b) } -/// Set each bit of the returned mask based on the most significant bit of the +/// Sets each bit of the returned mask based on the most significant bit of the /// corresponding packed double-precision (64-bit) floating-point element in /// `a`. /// @@ -2319,7 +2317,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { movmskpd256(a) } -/// Set each bit of the returned mask based on the most significant bit of the +/// Sets each bit of the returned mask based on the most significant bit of the /// corresponding packed single-precision (32-bit) floating-point element in /// `a`. /// @@ -2332,7 +2330,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { movmskps256(a) } -/// Return vector of type __m256d with all elements set to zero. +/// Returns vector of type __m256d with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_pd) #[inline] @@ -2343,7 +2341,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d { _mm256_set1_pd(0.0) } -/// Return vector of type __m256 with all elements set to zero. +/// Returns vector of type __m256 with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ps) #[inline] @@ -2354,7 +2352,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 { _mm256_set1_ps(0.0) } -/// Return vector of type __m256i with all elements set to zero. +/// Returns vector of type __m256i with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_si256) #[inline] @@ -2365,7 +2363,7 @@ pub unsafe fn _mm256_setzero_si256() -> __m256i { _mm256_set1_epi8(0) } -/// Set packed double-precision (64-bit) floating-point elements in returned +/// Sets packed double-precision (64-bit) floating-point elements in returned /// vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_pd) @@ -2378,7 +2376,7 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { _mm256_setr_pd(d, c, b, a) } -/// Set packed single-precision (32-bit) floating-point elements in returned +/// Sets packed single-precision (32-bit) floating-point elements in returned /// vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ps) @@ -2399,7 +2397,7 @@ pub unsafe fn _mm256_set_ps( _mm256_setr_ps(h, g, f, e, d, c, b, a) } -/// Set packed 8-bit integers in returned vector with the supplied values in +/// Sets packed 8-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi8) @@ -2450,7 +2448,7 @@ pub unsafe fn _mm256_set_epi8( ) } -/// Set packed 16-bit integers in returned vector with the supplied values. +/// Sets packed 16-bit integers in returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi16) #[inline] @@ -2484,7 +2482,7 @@ pub unsafe fn _mm256_set_epi16( ) } -/// Set packed 32-bit integers in returned vector with the supplied values. +/// Sets packed 32-bit integers in returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi32) #[inline] @@ -2504,7 +2502,7 @@ pub unsafe fn _mm256_set_epi32( _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) } -/// Set packed 64-bit integers in returned vector with the supplied values. +/// Sets packed 64-bit integers in returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi64x) #[inline] @@ -2515,7 +2513,7 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { _mm256_setr_epi64x(d, c, b, a) } -/// Set packed double-precision (64-bit) floating-point elements in returned +/// Sets packed double-precision (64-bit) floating-point elements in returned /// vector with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_pd) @@ -2527,7 +2525,7 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { __m256d(a, b, c, d) } -/// Set packed single-precision (32-bit) floating-point elements in returned +/// Sets packed single-precision (32-bit) floating-point elements in returned /// vector with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ps) @@ -2548,7 +2546,7 @@ pub unsafe fn _mm256_setr_ps( __m256(a, b, c, d, e, f, g, h) } -/// Set packed 8-bit integers in returned vector with the supplied values in +/// Sets packed 8-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi8) @@ -2599,7 +2597,7 @@ pub unsafe fn _mm256_setr_epi8( )) } -/// Set packed 16-bit integers in returned vector with the supplied values in +/// Sets packed 16-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi16) @@ -2634,7 +2632,7 @@ pub unsafe fn _mm256_setr_epi16( )) } -/// Set packed 32-bit integers in returned vector with the supplied values in +/// Sets packed 32-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi32) @@ -2655,7 +2653,7 @@ pub unsafe fn _mm256_setr_epi32( mem::transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } -/// Set packed 64-bit integers in returned vector with the supplied values in +/// Sets packed 64-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi64x) @@ -2667,7 +2665,7 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { mem::transmute(i64x4::new(a, b, c, d)) } -/// Broadcast double-precision (64-bit) floating-point value `a` to all +/// Broadcasts double-precision (64-bit) floating-point value `a` to all /// elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pd) @@ -2679,7 +2677,7 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { _mm256_setr_pd(a, a, a, a) } -/// Broadcast single-precision (32-bit) floating-point value `a` to all +/// Broadcasts single-precision (32-bit) floating-point value `a` to all /// elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ps) @@ -2691,7 +2689,7 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { _mm256_setr_ps(a, a, a, a, a, a, a, a) } -/// Broadcast 8-bit integer `a` to all elements of returned vector. +/// Broadcasts 8-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastb`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi8) @@ -2711,7 +2709,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { ) } -/// Broadcast 16-bit integer `a` to all all elements of returned vector. +/// Broadcasts 16-bit integer `a` to all all elements of returned vector. /// This intrinsic may generate the `vpbroadcastw`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16) @@ -2725,7 +2723,7 @@ pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } -/// Broadcast 32-bit integer `a` to all elements of returned vector. +/// Broadcasts 32-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastd`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi32) @@ -2737,7 +2735,7 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { _mm256_setr_epi32(a, a, a, a, a, a, a, a) } -/// Broadcast 64-bit integer `a` to all elements of returned vector. +/// Broadcasts 64-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastq`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi64x) @@ -2950,7 +2948,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) } -/// Return vector of type `__m256` with undefined elements. +/// Returns vector of type `__m256` with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ps) #[inline] @@ -2962,7 +2960,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 { mem::MaybeUninit::<__m256>::uninitialized().into_initialized() } -/// Return vector of type `__m256d` with undefined elements. +/// Returns vector of type `__m256d` with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_pd) #[inline] @@ -2974,7 +2972,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d { mem::MaybeUninit::<__m256d>::uninitialized().into_initialized() } -/// Return vector of type __m256i with undefined elements. +/// Returns vector of type __m256i with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_si256) #[inline] @@ -2986,7 +2984,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i { mem::MaybeUninit::<__m256i>::uninitialized().into_initialized() } -/// Set packed __m256 returned vector with the supplied values. +/// Sets packed __m256 returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128) #[inline] @@ -2997,7 +2995,7 @@ pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } -/// Set packed __m256d returned vector with the supplied values. +/// Sets packed __m256d returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128d) #[inline] @@ -3010,7 +3008,7 @@ pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { mem::transmute(_mm256_set_m128(hi, lo)) } -/// Set packed __m256i returned vector with the supplied values. +/// Sets packed __m256i returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128i) #[inline] @@ -3023,7 +3021,7 @@ pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { mem::transmute(_mm256_set_m128(hi, lo)) } -/// Set packed __m256 returned vector with the supplied values. +/// Sets packed __m256 returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128) #[inline] @@ -3034,7 +3032,7 @@ pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { _mm256_set_m128(hi, lo) } -/// Set packed __m256d returned vector with the supplied values. +/// Sets packed __m256d returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128d) #[inline] @@ -3045,7 +3043,7 @@ pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { _mm256_set_m128d(hi, lo) } -/// Set packed __m256i returned vector with the supplied values. +/// Sets packed __m256i returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128i) #[inline] @@ -3056,7 +3054,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { _mm256_set_m128i(hi, lo) } -/// Load two 128-bit values (composed of 4 packed single-precision (32-bit) +/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit) /// floating-point elements) from memory, and combine them into a 256-bit /// value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. @@ -3071,7 +3069,7 @@ pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m2 _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1) } -/// Load two 128-bit values (composed of 2 packed double-precision (64-bit) +/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory, and combine them into a 256-bit /// value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. @@ -3086,7 +3084,7 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1) } -/// Load two 128-bit values (composed of integer data) from memory, and combine +/// Loads two 128-bit values (composed of integer data) from memory, and combine /// them into a 256-bit value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// @@ -3100,7 +3098,7 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1) } -/// Store the high and low 128-bit halves (each composed of 4 packed +/// Stores the high and low 128-bit halves (each composed of 4 packed /// single-precision (32-bit) floating-point elements) from `a` into memory two /// different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. @@ -3117,7 +3115,7 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) _mm_storeu_ps(hiaddr, hi); } -/// Store the high and low 128-bit halves (each composed of 2 packed +/// Stores the high and low 128-bit halves (each composed of 2 packed /// double-precision (64-bit) floating-point elements) from `a` into memory two /// different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. @@ -3134,7 +3132,7 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256 _mm_storeu_pd(hiaddr, hi); } -/// Store the high and low 128-bit halves (each composed of integer data) from +/// Stores the high and low 128-bit halves (each composed of integer data) from /// `a` into memory two different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index 1043e96b95a..6a08d10a9ff 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -59,7 +59,7 @@ pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { mem::transmute(pabsb(a.as_i8x32())) } -/// Add packed 64-bit integers in `a` and `b`. +/// Adds packed 64-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64) #[inline] @@ -70,7 +70,7 @@ pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i64x4(), b.as_i64x4())) } -/// Add packed 32-bit integers in `a` and `b`. +/// Adds packed 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32) #[inline] @@ -81,7 +81,7 @@ pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i32x8(), b.as_i32x8())) } -/// Add packed 16-bit integers in `a` and `b`. +/// Adds packed 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16) #[inline] @@ -92,7 +92,7 @@ pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i16x16(), b.as_i16x16())) } -/// Add packed 8-bit integers in `a` and `b`. +/// Adds packed 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8) #[inline] @@ -103,7 +103,7 @@ pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_add(a.as_i8x32(), b.as_i8x32())) } -/// Add packed 8-bit integers in `a` and `b` using saturation. +/// Adds packed 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8) #[inline] @@ -114,7 +114,7 @@ pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddsb(a.as_i8x32(), b.as_i8x32())) } -/// Add packed 16-bit integers in `a` and `b` using saturation. +/// Adds packed 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16) #[inline] @@ -125,7 +125,7 @@ pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddsw(a.as_i16x16(), b.as_i16x16())) } -/// Add packed unsigned 8-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8) #[inline] @@ -136,7 +136,7 @@ pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddusb(a.as_u8x32(), b.as_u8x32())) } -/// Add packed unsigned 16-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16) #[inline] @@ -147,8 +147,8 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(paddusw(a.as_u16x16(), b.as_u16x16())) } -/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary -/// result, shift the result right by `n` bytes, and return the low 16 bytes. +/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary +/// result, shifts the result right by `n` bytes, and returns the low 16 bytes. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8) #[inline] @@ -158,12 +158,12 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i { let n = n as u32; - // If palignr is shifting the pair of vectors more than the size of two + // If `palignr` is shifting the pair of vectors more than the size of two // lanes, emit zero. if n > 32 { return _mm256_set1_epi8(0); } - // If palignr is shifting the pair of input vectors more than one lane, + // If `palignr` is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. let (a, b, n) = if n > 16 { (_mm256_set1_epi8(0), a, n - 16) @@ -308,7 +308,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i { mem::transmute(r) } -/// Compute the bitwise AND of 256 bits (representing integer data) +/// Computes the bitwise AND of 256 bits (representing integer data) /// in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256) @@ -320,7 +320,7 @@ pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_and(a.as_i64x4(), b.as_i64x4())) } -/// Compute the bitwise NOT of 256 bits (representing integer data) +/// Computes the bitwise NOT of 256 bits (representing integer data) /// in `a` and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256) @@ -336,7 +336,7 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { )) } -/// Average packed unsigned 16-bit integers in `a` and `b`. +/// Averages packed unsigned 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16) #[inline] @@ -347,7 +347,7 @@ pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pavgw(a.as_u16x16(), b.as_u16x16())) } -/// Average packed unsigned 8-bit integers in `a` and `b`. +/// Averages packed unsigned 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8) #[inline] @@ -358,7 +358,7 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pavgb(a.as_u8x32(), b.as_u8x32())) } -/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`. +/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32) #[inline] @@ -394,7 +394,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { mem::transmute(r) } -/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`. +/// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32) #[inline] @@ -459,7 +459,7 @@ pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { mem::transmute(r) } -/// Blend packed 16-bit integers from `a` and `b` using control mask `imm8`. +/// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16) #[inline] @@ -564,7 +564,7 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { mem::transmute(r) } -/// Blend packed 8-bit integers from `a` and `b` using `mask`. +/// Blends packed 8-bit integers from `a` and `b` using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8) #[inline] @@ -575,7 +575,7 @@ pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m25 mem::transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32())) } -/// Broadcast the low packed 8-bit integer from `a` to all elements of +/// Broadcasts the low packed 8-bit integer from `a` to all elements of /// the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8) @@ -589,7 +589,7 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { mem::transmute::(ret) } -/// Broadcast the low packed 8-bit integer from `a` to all elements of +/// Broadcasts the low packed 8-bit integer from `a` to all elements of /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8) @@ -603,9 +603,9 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { mem::transmute::(ret) } -// NB: simd_shuffle4 with integer data types for `a` and `b` is -// often compiled to vbroadcastss. -/// Broadcast the low packed 32-bit integer from `a` to all elements of +// N.B., `simd_shuffle4` with integer data types for `a` and `b` is +// often compiled to `vbroadcastss`. +/// Broadcasts the low packed 32-bit integer from `a` to all elements of /// the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32) @@ -619,9 +619,9 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { mem::transmute::(ret) } -// NB: simd_shuffle4 with integer data types for `a` and `b` is -// often compiled to vbroadcastss. -/// Broadcast the low packed 32-bit integer from `a` to all elements of +// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is +// often compiled to `vbroadcastss`. +/// Broadcasts the low packed 32-bit integer from `a` to all elements of /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32) @@ -635,7 +635,7 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { mem::transmute::(ret) } -/// Broadcast the low packed 64-bit integer from `a` to all elements of +/// Broadcasts the low packed 64-bit integer from `a` to all elements of /// the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64) @@ -649,9 +649,9 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { mem::transmute::(ret) } -// NB: simd_shuffle4 with integer data types for `a` and `b` is -// often compiled to vbroadcastsd. -/// Broadcast the low packed 64-bit integer from `a` to all elements of +// N.B. `simd_shuffle4` with integer data types for `a` and `b` is +// often compiled to `vbroadcastsd`. +/// Broadcasts the low packed 64-bit integer from `a` to all elements of /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64) @@ -665,7 +665,7 @@ pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { mem::transmute::(ret) } -/// Broadcast the low double-precision (64-bit) floating-point element +/// Broadcasts the low double-precision (64-bit) floating-point element /// from `a` to all elements of the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd) @@ -677,7 +677,7 @@ pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { simd_shuffle2(a, _mm_setzero_pd(), [0_u32; 2]) } -/// Broadcast the low double-precision (64-bit) floating-point element +/// Broadcasts the low double-precision (64-bit) floating-point element /// from `a` to all elements of the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd) @@ -689,9 +689,9 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0_u32; 4]) } -// NB: broadcastsi128_si256 is often compiled to vinsertf128 or -// vbroadcastf128. -/// Broadcast 128 bits of integer data from a to all 128-bit lanes in +// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or +// `vbroadcastf128`. +/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256) @@ -704,7 +704,7 @@ pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { mem::transmute::(ret) } -/// Broadcast the low single-precision (32-bit) floating-point element +/// Broadcasts the low single-precision (32-bit) floating-point element /// from `a` to all elements of the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps) @@ -716,7 +716,7 @@ pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { simd_shuffle4(a, _mm_setzero_ps(), [0_u32; 4]) } -/// Broadcast the low single-precision (32-bit) floating-point element +/// Broadcasts the low single-precision (32-bit) floating-point element /// from `a` to all elements of the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps) @@ -728,7 +728,7 @@ pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { simd_shuffle8(a, _mm_setzero_ps(), [0_u32; 8]) } -/// Broadcast the low packed 16-bit integer from a to all elements of +/// Broadcasts the low packed 16-bit integer from a to all elements of /// the 128-bit returned value /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16) @@ -742,7 +742,7 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { mem::transmute::(ret) } -/// Broadcast the low packed 16-bit integer from a to all elements of +/// Broadcasts the low packed 16-bit integer from a to all elements of /// the 256-bit returned value /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16) @@ -756,7 +756,7 @@ pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { mem::transmute::(ret) } -/// Compare packed 64-bit integers in `a` and `b` for equality. +/// Compares packed 64-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64) #[inline] @@ -767,7 +767,7 @@ pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i64x4(), b.as_i64x4())) } -/// Compare packed 32-bit integers in `a` and `b` for equality. +/// Compares packed 32-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32) #[inline] @@ -778,7 +778,7 @@ pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i32x8(), b.as_i32x8())) } -/// Compare packed 16-bit integers in `a` and `b` for equality. +/// Compares packed 16-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16) #[inline] @@ -789,7 +789,7 @@ pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i16x16(), b.as_i16x16())) } -/// Compare packed 8-bit integers in `a` and `b` for equality. +/// Compares packed 8-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8) #[inline] @@ -800,7 +800,7 @@ pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_eq(a.as_i8x32(), b.as_i8x32())) } -/// Compare packed 64-bit integers in `a` and `b` for greater-than. +/// Compares packed 64-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64) #[inline] @@ -811,7 +811,7 @@ pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i64x4(), b.as_i64x4())) } -/// Compare packed 32-bit integers in `a` and `b` for greater-than. +/// Compares packed 32-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32) #[inline] @@ -822,7 +822,7 @@ pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i32x8(), b.as_i32x8())) } -/// Compare packed 16-bit integers in `a` and `b` for greater-than. +/// Compares packed 16-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16) #[inline] @@ -833,7 +833,7 @@ pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute::(simd_gt(a.as_i16x16(), b.as_i16x16())) } -/// Compare packed 8-bit integers in `a` and `b` for greater-than. +/// Compares packed 8-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8) #[inline] @@ -916,8 +916,8 @@ pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { mem::transmute::(simd_cast(v32)) } -/// Zero extend packed unsigned 16-bit integers in `a` to packed 32-bit -/// integers, and store the results in dst. +/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit +/// integers, and stores the results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32) #[inline] @@ -992,7 +992,7 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { mem::transmute::(simd_cast(v32)) } -/// Extract 128 bits (of integer data) from `a` selected with `imm8`. +/// Extracts 128 bits (of integer data) from `a` selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256) #[inline] @@ -1013,7 +1013,7 @@ pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i { mem::transmute(dst) } -/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`. +/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16) #[inline] @@ -1024,7 +1024,7 @@ pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phaddw(a.as_i16x16(), b.as_i16x16())) } -/// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`. +/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32) #[inline] @@ -1035,7 +1035,7 @@ pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phaddd(a.as_i32x8(), b.as_i32x8())) } -/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b` +/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b` /// using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16) @@ -1081,7 +1081,7 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1105,7 +1105,7 @@ pub unsafe fn _mm_i32gather_epi32(slice: *const i32, offsets: __m128i, scale: i3 mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1136,7 +1136,7 @@ pub unsafe fn _mm_mask_i32gather_epi32( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1160,7 +1160,7 @@ pub unsafe fn _mm256_i32gather_epi32(slice: *const i32, offsets: __m256i, scale: mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1191,7 +1191,7 @@ pub unsafe fn _mm256_mask_i32gather_epi32( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1214,7 +1214,7 @@ pub unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: __m128i, scale: i32) constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1242,7 +1242,7 @@ pub unsafe fn _mm_mask_i32gather_ps( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1265,7 +1265,7 @@ pub unsafe fn _mm256_i32gather_ps(slice: *const f32, offsets: __m256i, scale: i3 constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1293,7 +1293,7 @@ pub unsafe fn _mm256_mask_i32gather_ps( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1317,7 +1317,7 @@ pub unsafe fn _mm_i32gather_epi64(slice: *const i64, offsets: __m128i, scale: i3 mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1348,7 +1348,7 @@ pub unsafe fn _mm_mask_i32gather_epi64( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1372,7 +1372,7 @@ pub unsafe fn _mm256_i32gather_epi64(slice: *const i64, offsets: __m128i, scale: mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1403,7 +1403,7 @@ pub unsafe fn _mm256_mask_i32gather_epi64( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1426,7 +1426,7 @@ pub unsafe fn _mm_i32gather_pd(slice: *const f64, offsets: __m128i, scale: i32) constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1454,7 +1454,7 @@ pub unsafe fn _mm_mask_i32gather_pd( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1477,7 +1477,7 @@ pub unsafe fn _mm256_i32gather_pd(slice: *const f64, offsets: __m128i, scale: i3 constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1505,7 +1505,7 @@ pub unsafe fn _mm256_mask_i32gather_pd( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1529,7 +1529,7 @@ pub unsafe fn _mm_i64gather_epi32(slice: *const i32, offsets: __m128i, scale: i3 mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1560,7 +1560,7 @@ pub unsafe fn _mm_mask_i64gather_epi32( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1584,7 +1584,7 @@ pub unsafe fn _mm256_i64gather_epi32(slice: *const i32, offsets: __m256i, scale: mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1615,7 +1615,7 @@ pub unsafe fn _mm256_mask_i64gather_epi32( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1638,7 +1638,7 @@ pub unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: __m128i, scale: i32) constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1666,7 +1666,7 @@ pub unsafe fn _mm_mask_i64gather_ps( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1689,7 +1689,7 @@ pub unsafe fn _mm256_i64gather_ps(slice: *const f32, offsets: __m256i, scale: i3 constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1717,7 +1717,7 @@ pub unsafe fn _mm256_mask_i64gather_ps( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1741,7 +1741,7 @@ pub unsafe fn _mm_i64gather_epi64(slice: *const i64, offsets: __m128i, scale: i3 mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1772,7 +1772,7 @@ pub unsafe fn _mm_mask_i64gather_epi64( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1796,7 +1796,7 @@ pub unsafe fn _mm256_i64gather_epi64(slice: *const i64, offsets: __m256i, scale: mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1827,7 +1827,7 @@ pub unsafe fn _mm256_mask_i64gather_epi64( mem::transmute(r) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1850,7 +1850,7 @@ pub unsafe fn _mm_i64gather_pd(slice: *const f64, offsets: __m128i, scale: i32) constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1878,7 +1878,7 @@ pub unsafe fn _mm_mask_i64gather_pd( constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// @@ -1901,7 +1901,7 @@ pub unsafe fn _mm256_i64gather_pd(slice: *const f64, offsets: __m256i, scale: i3 constify_imm8!(scale, call) } -/// Return values from `slice` at offsets determined by `offsets * scale`, +/// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. @@ -1929,7 +1929,7 @@ pub unsafe fn _mm256_mask_i64gather_pd( constify_imm8!(scale, call) } -/// Copy `a` to `dst`, then insert 128 bits (of integer data) from `b` at the +/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256) @@ -1951,7 +1951,7 @@ pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m2 mem::transmute(dst) } -/// Multiply packed signed 16-bit integers in `a` and `b`, producing +/// Multiplies packed signed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Horizontally add adjacent pairs /// of intermediate 32-bit integers. /// @@ -1964,7 +1964,7 @@ pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) } -/// Vertically multiply each unsigned 8-bit integer from `a` with the +/// Vertically multiplies each unsigned 8-bit integer from `a` with the /// corresponding signed 8-bit integer from `b`, producing intermediate /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate /// signed 16-bit integers @@ -1978,7 +1978,7 @@ pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) } -/// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask` +/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// @@ -1991,7 +1991,7 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i mem::transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4())) } -/// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask` +/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// @@ -2004,7 +2004,7 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2 mem::transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8())) } -/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask` +/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// @@ -2017,7 +2017,7 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i mem::transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2())) } -/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask` +/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// @@ -2030,7 +2030,7 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2 mem::transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4())) } -/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr` +/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// @@ -2043,7 +2043,7 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4()) } -/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr` +/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// @@ -2056,7 +2056,7 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25 maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8()) } -/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr` +/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// @@ -2069,7 +2069,7 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2()) } -/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr` +/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// @@ -2082,7 +2082,7 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25 maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4()) } -/// Compare packed 16-bit integers in `a` and `b`, and return the packed +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16) @@ -2094,7 +2094,7 @@ pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxsw(a.as_i16x16(), b.as_i16x16())) } -/// Compare packed 32-bit integers in `a` and `b`, and return the packed +/// Compares packed 32-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32) @@ -2106,7 +2106,7 @@ pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxsd(a.as_i32x8(), b.as_i32x8())) } -/// Compare packed 8-bit integers in `a` and `b`, and return the packed +/// Compares packed 8-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8) @@ -2118,7 +2118,7 @@ pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxsb(a.as_i8x32(), b.as_i8x32())) } -/// Compare packed unsigned 16-bit integers in `a` and `b`, and return +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns /// the packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16) @@ -2130,7 +2130,7 @@ pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxuw(a.as_u16x16(), b.as_u16x16())) } -/// Compare packed unsigned 32-bit integers in `a` and `b`, and return +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns /// the packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32) @@ -2142,7 +2142,7 @@ pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxud(a.as_u32x8(), b.as_u32x8())) } -/// Compare packed unsigned 8-bit integers in `a` and `b`, and return +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns /// the packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8) @@ -2154,7 +2154,7 @@ pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmaxub(a.as_u8x32(), b.as_u8x32())) } -/// Compare packed 16-bit integers in `a` and `b`, and return the packed +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16) @@ -2166,7 +2166,7 @@ pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminsw(a.as_i16x16(), b.as_i16x16())) } -/// Compare packed 32-bit integers in `a` and `b`, and return the packed +/// Compares packed 32-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32) @@ -2178,7 +2178,7 @@ pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminsd(a.as_i32x8(), b.as_i32x8())) } -/// Compare packed 8-bit integers in `a` and `b`, and return the packed +/// Compares packed 8-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8) @@ -2190,7 +2190,7 @@ pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminsb(a.as_i8x32(), b.as_i8x32())) } -/// Compare packed unsigned 16-bit integers in `a` and `b`, and return +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns /// the packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16) @@ -2202,7 +2202,7 @@ pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminuw(a.as_u16x16(), b.as_u16x16())) } -/// Compare packed unsigned 32-bit integers in `a` and `b`, and return +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns /// the packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32) @@ -2214,7 +2214,7 @@ pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminud(a.as_u32x8(), b.as_u32x8())) } -/// Compare packed unsigned 8-bit integers in `a` and `b`, and return +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns /// the packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8) @@ -2226,7 +2226,7 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pminub(a.as_u8x32(), b.as_u8x32())) } -/// Create mask from the most significant bit of each 8-bit element in `a`, +/// Creates mask from the most significant bit of each 8-bit element in `a`, /// return the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8) @@ -2238,8 +2238,8 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { pmovmskb(a.as_i8x32()) } -/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned -/// 8-bit integers in `a` compared to those in `b`, and store the 16-bit +/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned +/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit /// results in dst. Eight SADs are performed for each 128-bit lane using one /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is /// selected from `b` starting at on the offset specified in `imm8`. Eight @@ -2264,10 +2264,10 @@ pub unsafe fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i, imm8: i32) -> __m256i mem::transmute(r) } -/// Multiply the low 32-bit integers from each packed 64-bit element in +/// Multiplies the low 32-bit integers from each packed 64-bit element in /// `a` and `b` /// -/// Return the 64-bit results. +/// Returns the 64-bit results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32) #[inline] @@ -2278,10 +2278,10 @@ pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmuldq(a.as_i32x8(), b.as_i32x8())) } -/// Multiply the low unsigned 32-bit integers from each packed 64-bit +/// Multiplies the low unsigned 32-bit integers from each packed 64-bit /// element in `a` and `b` /// -/// Return the unsigned 64-bit results. +/// Returns the unsigned 64-bit results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32) #[inline] @@ -2292,7 +2292,7 @@ pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmuludq(a.as_u32x8(), b.as_u32x8())) } -/// Multiply the packed 16-bit integers in `a` and `b`, producing +/// Multiplies the packed 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers and returning the high 16 bits of the /// intermediate integers. /// @@ -2305,7 +2305,7 @@ pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmulhw(a.as_i16x16(), b.as_i16x16())) } -/// Multiply the packed unsigned 16-bit integers in `a` and `b`, producing +/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers and returning the high 16 bits of the /// intermediate integers. /// @@ -2318,8 +2318,8 @@ pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmulhuw(a.as_u16x16(), b.as_u16x16())) } -/// Multiply the packed 16-bit integers in `a` and `b`, producing -/// intermediate 32-bit integers, and return the low 16 bits of the +/// Multiplies the packed 16-bit integers in `a` and `b`, producing +/// intermediate 32-bit integers, and returns the low 16 bits of the /// intermediate integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16) @@ -2331,8 +2331,8 @@ pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) } -/// Multiply the packed 32-bit integers in `a` and `b`, producing -/// intermediate 64-bit integers, and return the low 16 bits of the +/// Multiplies the packed 32-bit integers in `a` and `b`, producing +/// intermediate 64-bit integers, and returns the low 16 bits of the /// intermediate integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32) @@ -2344,7 +2344,7 @@ pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) } -/// Multiply packed 16-bit integers in `a` and `b`, producing +/// Multiplies packed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Truncate each intermediate /// integer to the 18 most significant bits, round by adding 1, and /// return bits `[16:1]`. @@ -2358,7 +2358,7 @@ pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) } -/// Compute the bitwise OR of 256 bits (representing integer data) in `a` +/// Computes the bitwise OR of 256 bits (representing integer data) in `a` /// and `b` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256) @@ -2370,7 +2370,7 @@ pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } -/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16) @@ -2382,7 +2382,7 @@ pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packsswb(a.as_i16x16(), b.as_i16x16())) } -/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32) @@ -2394,7 +2394,7 @@ pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packssdw(a.as_i32x8(), b.as_i32x8())) } -/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16) @@ -2406,7 +2406,7 @@ pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(packuswb(a.as_i16x16(), b.as_i16x16())) } -/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32) @@ -2488,7 +2488,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i { mem::transmute(r) } -/// Shuffle 128-bits of integer data selected by `imm8` from `a` and `b`. +/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256) #[inline] @@ -2507,7 +2507,7 @@ pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i, imm8: i32) -> __ mem::transmute(constify_imm8!(imm8, call)) } -/// Shuffle 64-bit floating-point elements in `a` across lanes using the +/// Shuffles 64-bit floating-point elements in `a` across lanes using the /// control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd) @@ -2562,7 +2562,7 @@ pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d { } } -/// Shuffle eight 32-bit foating-point elements in `a` across lanes using +/// Shuffles eight 32-bit foating-point elements in `a` across lanes using /// the corresponding 32-bit integer index in `idx`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps) @@ -2574,7 +2574,7 @@ pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { permps(a, idx.as_i32x8()) } -/// Compute the absolute differences of packed unsigned 8-bit integers in `a` +/// Computes the absolute differences of packed unsigned 8-bit integers in `a` /// and `b`, then horizontally sum each consecutive 8 differences to /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit /// integers in the low 16 bits of the 64-bit return value @@ -2588,7 +2588,7 @@ pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psadbw(a.as_u8x32(), b.as_u8x32())) } -/// Shuffle bytes from `a` according to the content of `b`. +/// Shuffles bytes from `a` according to the content of `b`. /// /// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes /// of `a`. @@ -2627,7 +2627,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(pshufb(a.as_u8x32(), b.as_u8x32())) } -/// Shuffle 32-bit integers in 128-bit lanes of `a` using the control in +/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in /// `imm8`. /// /// ```rust @@ -2730,7 +2730,7 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(r) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of `a` using +/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied /// to the output. /// @@ -2791,7 +2791,7 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(r) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of `a` using +/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied /// to the output. /// @@ -2852,8 +2852,8 @@ pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(r) } -/// Negate packed 16-bit integers in `a` when the corresponding signed -/// 16-bit integer in `b` is negative, and return the results. +/// Negates packed 16-bit integers in `a` when the corresponding signed +/// 16-bit integer in `b` is negative, and returns the results. /// Results are zeroed out when the corresponding element in `b` is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16) @@ -2865,8 +2865,8 @@ pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psignw(a.as_i16x16(), b.as_i16x16())) } -/// Negate packed 32-bit integers in `a` when the corresponding signed -/// 32-bit integer in `b` is negative, and return the results. +/// Negates packed 32-bit integers in `a` when the corresponding signed +/// 32-bit integer in `b` is negative, and returns the results. /// Results are zeroed out when the corresponding element in `b` is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32) @@ -2878,8 +2878,8 @@ pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psignd(a.as_i32x8(), b.as_i32x8())) } -/// Negate packed 8-bit integers in `a` when the corresponding signed -/// 8-bit integer in `b` is negative, and return the results. +/// Negates packed 8-bit integers in `a` when the corresponding signed +/// 8-bit integer in `b` is negative, and returns the results. /// Results are zeroed out when the corresponding element in `b` is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8) @@ -2891,8 +2891,8 @@ pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psignb(a.as_i8x32(), b.as_i8x32())) } -/// Shift packed 16-bit integers in `a` left by `count` while -/// shifting in zeros, and return the result +/// Shifts packed 16-bit integers in `a` left by `count` while +/// shifting in zeros, and returns the result /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16) #[inline] @@ -2903,8 +2903,8 @@ pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psllw(a.as_i16x16(), count.as_i16x8())) } -/// Shift packed 32-bit integers in `a` left by `count` while -/// shifting in zeros, and return the result +/// Shifts packed 32-bit integers in `a` left by `count` while +/// shifting in zeros, and returns the result /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32) #[inline] @@ -2915,8 +2915,8 @@ pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { mem::transmute(pslld(a.as_i32x8(), count.as_i32x4())) } -/// Shift packed 64-bit integers in `a` left by `count` while -/// shifting in zeros, and return the result +/// Shifts packed 64-bit integers in `a` left by `count` while +/// shifting in zeros, and returns the result /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64) #[inline] @@ -2927,7 +2927,7 @@ pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psllq(a.as_i64x4(), count.as_i64x2())) } -/// Shift packed 16-bit integers in `a` left by `imm8` while +/// Shifts packed 16-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16) @@ -2939,7 +2939,7 @@ pub unsafe fn _mm256_slli_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(pslliw(a.as_i16x16(), imm8)) } -/// Shift packed 32-bit integers in `a` left by `imm8` while +/// Shifts packed 32-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32) @@ -2951,7 +2951,7 @@ pub unsafe fn _mm256_slli_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psllid(a.as_i32x8(), imm8)) } -/// Shift packed 64-bit integers in `a` left by `imm8` while +/// Shifts packed 64-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64) @@ -2963,7 +2963,7 @@ pub unsafe fn _mm256_slli_epi64(a: __m256i, imm8: i32) -> __m256i { mem::transmute(pslliq(a.as_i64x4(), imm8)) } -/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. +/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256) #[inline] @@ -2981,7 +2981,7 @@ pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i { mem::transmute(constify_imm8!(imm8 * 8, call)) } -/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. +/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128) #[inline] @@ -2999,9 +2999,9 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i { mem::transmute(constify_imm8!(imm8 * 8, call)) } -/// Shift packed 32-bit integers in `a` left by the amount +/// Shifts packed 32-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while -/// shifting in zeros, and return the result. +/// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32) #[inline] @@ -3012,9 +3012,9 @@ pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } -/// Shift packed 32-bit integers in `a` left by the amount +/// Shifts packed 32-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while -/// shifting in zeros, and return the result. +/// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32) #[inline] @@ -3025,9 +3025,9 @@ pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } -/// Shift packed 64-bit integers in `a` left by the amount +/// Shifts packed 64-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while -/// shifting in zeros, and return the result. +/// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64) #[inline] @@ -3038,9 +3038,9 @@ pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } -/// Shift packed 64-bit integers in `a` left by the amount +/// Shifts packed 64-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while -/// shifting in zeros, and return the result. +/// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64) #[inline] @@ -3051,7 +3051,7 @@ pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } -/// Shift packed 16-bit integers in `a` right by `count` while +/// Shifts packed 16-bit integers in `a` right by `count` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16) @@ -3063,7 +3063,7 @@ pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psraw(a.as_i16x16(), count.as_i16x8())) } -/// Shift packed 32-bit integers in `a` right by `count` while +/// Shifts packed 32-bit integers in `a` right by `count` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32) @@ -3075,7 +3075,7 @@ pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrad(a.as_i32x8(), count.as_i32x4())) } -/// Shift packed 16-bit integers in `a` right by `imm8` while +/// Shifts packed 16-bit integers in `a` right by `imm8` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16) @@ -3087,7 +3087,7 @@ pub unsafe fn _mm256_srai_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psraiw(a.as_i16x16(), imm8)) } -/// Shift packed 32-bit integers in `a` right by `imm8` while +/// Shifts packed 32-bit integers in `a` right by `imm8` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32) @@ -3099,7 +3099,7 @@ pub unsafe fn _mm256_srai_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psraid(a.as_i32x8(), imm8)) } -/// Shift packed 32-bit integers in `a` right by the amount specified by the +/// Shifts packed 32-bit integers in `a` right by the amount specified by the /// corresponding element in `count` while shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32) @@ -3111,7 +3111,7 @@ pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psravd(a.as_i32x4(), count.as_i32x4())) } -/// Shift packed 32-bit integers in `a` right by the amount specified by the +/// Shifts packed 32-bit integers in `a` right by the amount specified by the /// corresponding element in `count` while shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32) @@ -3123,7 +3123,7 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } -/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. +/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256) #[inline] @@ -3141,7 +3141,7 @@ pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i { mem::transmute(constify_imm8!(imm8 * 8, call)) } -/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. +/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128) #[inline] @@ -3159,7 +3159,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i { mem::transmute(constify_imm8!(imm8 * 8, call)) } -/// Shift packed 16-bit integers in `a` right by `count` while shifting in +/// Shifts packed 16-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16) @@ -3171,7 +3171,7 @@ pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } -/// Shift packed 32-bit integers in `a` right by `count` while shifting in +/// Shifts packed 32-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32) @@ -3183,7 +3183,7 @@ pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrld(a.as_i32x8(), count.as_i32x4())) } -/// Shift packed 64-bit integers in `a` right by `count` while shifting in +/// Shifts packed 64-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64) @@ -3195,7 +3195,7 @@ pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { mem::transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } -/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in +/// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16) @@ -3207,7 +3207,7 @@ pub unsafe fn _mm256_srli_epi16(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psrliw(a.as_i16x16(), imm8)) } -/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in +/// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in /// zeros /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32) @@ -3219,7 +3219,7 @@ pub unsafe fn _mm256_srli_epi32(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psrlid(a.as_i32x8(), imm8)) } -/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in +/// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in /// zeros /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64) @@ -3231,7 +3231,7 @@ pub unsafe fn _mm256_srli_epi64(a: __m256i, imm8: i32) -> __m256i { mem::transmute(psrliq(a.as_i64x4(), imm8)) } -/// Shift packed 32-bit integers in `a` right by the amount specified by +/// Shifts packed 32-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32) @@ -3243,7 +3243,7 @@ pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } -/// Shift packed 32-bit integers in `a` right by the amount specified by +/// Shifts packed 32-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32) @@ -3255,7 +3255,7 @@ pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { mem::transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } -/// Shift packed 64-bit integers in `a` right by the amount specified by +/// Shifts packed 64-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64) @@ -3267,7 +3267,7 @@ pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } -/// Shift packed 64-bit integers in `a` right by the amount specified by +/// Shifts packed 64-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64) @@ -3373,7 +3373,7 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(psubusb(a.as_u8x32(), b.as_u8x32())) } -/// Unpack and interleave 8-bit integers from the high half of each +/// Unpacks and interleave 8-bit integers from the high half of each /// 128-bit lane in `a` and `b`. /// /// ```rust @@ -3427,7 +3427,7 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 8-bit integers from the low half of each +/// Unpacks and interleave 8-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3480,7 +3480,7 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 16-bit integers from the high half of each +/// Unpacks and interleave 16-bit integers from the high half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3527,7 +3527,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 16-bit integers from the low half of each +/// Unpacks and interleave 16-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3575,7 +3575,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 32-bit integers from the high half of each +/// Unpacks and interleave 32-bit integers from the high half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3612,7 +3612,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 32-bit integers from the low half of each +/// Unpacks and interleave 32-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3649,7 +3649,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 64-bit integers from the high half of each +/// Unpacks and interleave 64-bit integers from the high half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3686,7 +3686,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Unpack and interleave 64-bit integers from the low half of each +/// Unpacks and interleave 64-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust @@ -3723,7 +3723,7 @@ pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { mem::transmute(r) } -/// Compute the bitwise XOR of 256 bits (representing integer data) +/// Computes the bitwise XOR of 256 bits (representing integer data) /// in `a` and `b` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256) @@ -3735,7 +3735,7 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { mem::transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } -/// Extract an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit +/// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. @@ -3751,7 +3751,7 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 { simd_extract(a.as_i8x32(), imm8) } -/// Extract a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit +/// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. @@ -3767,7 +3767,7 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 { simd_extract(a.as_i16x16(), imm8) } -/// Extract a 32-bit integer from `a`, selected with `imm8`. +/// Extracts a 32-bit integer from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index 166ca179a52..4e04f0b5079 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -21,7 +21,7 @@ pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { mem::transmute(simd_select(cmp, a, sub)) } -/// Compute the absolute value of packed 32-bit integers in `a`, and store the +/// Computes the absolute value of packed 32-bit integers in `a`, and store the /// unsigned results in `dst` using writemask `k` (elements are copied from /// `src` when the corresponding mask bit is not set). /// @@ -34,7 +34,7 @@ pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _ mem::transmute(simd_select_bitmask(k, abs, src.as_i32x16())) } -/// Compute the absolute value of packed 32-bit integers in `a`, and store the +/// Computes the absolute value of packed 32-bit integers in `a`, and store the /// unsigned results in `dst` using zeromask `k` (elements are zeroed out when /// the corresponding mask bit is not set). /// @@ -48,7 +48,7 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { mem::transmute(simd_select_bitmask(k, abs, zero)) } -/// Return vector of type `__m512i` with all elements set to zero. +/// Returns vector of type `__m512i` with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512) #[inline] @@ -59,7 +59,7 @@ pub unsafe fn _mm512_setzero_si512() -> __m512i { mem::zeroed() } -/// Set packed 32-bit integers in `dst` with the supplied values in reverse +/// Sets packed 32-bit integers in `dst` with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "avx512f")] diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs index ece4d76dc3d..47a62d1f64b 100644 --- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs +++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs @@ -50,7 +50,7 @@ pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 { !a & b } -/// Extract lowest set isolated bit. +/// Extracts lowest set isolated bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32) #[inline] @@ -61,7 +61,7 @@ pub unsafe fn _blsi_u32(x: u32) -> u32 { x & x.wrapping_neg() } -/// Get mask up to lowest set bit. +/// Gets mask up to lowest set bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/bmi2.rs b/library/stdarch/crates/core_arch/src/x86/bmi2.rs index 7215e5a41d2..d3b9434a458 100644 --- a/library/stdarch/crates/core_arch/src/x86/bmi2.rs +++ b/library/stdarch/crates/core_arch/src/x86/bmi2.rs @@ -31,7 +31,7 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { result as u32 } -/// Zero higher bits of `a` >= `index`. +/// Zeroes higher bits of `a` >= `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs index ee6d6615b14..7d51082ff49 100644 --- a/library/stdarch/crates/core_arch/src/x86/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs @@ -5,7 +5,7 @@ #[cfg(test)] use stdsimd_test::assert_instr; -/// Return an integer with the reversed byte order of x +/// Returns an integer with the reversed byte order of x /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs index 4915c44f574..f88abf115de 100644 --- a/library/stdarch/crates/core_arch/src/x86/fma.rs +++ b/library/stdarch/crates/core_arch/src/x86/fma.rs @@ -23,7 +23,7 @@ use core_arch::x86::*; #[cfg(test)] use stdsimd_test::assert_instr; -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pd) @@ -35,7 +35,7 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmaddpd(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pd) @@ -47,7 +47,7 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfmaddpd256(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ps) @@ -59,7 +59,7 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmaddps(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ps) @@ -71,9 +71,9 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmaddps256(a, b, c) } -/// Multiply the lower double-precision (64-bit) floating-point elements in +/// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and add the intermediate result to the lower element in `c`. -/// Store the result in the lower element of the returned value, and copy the +/// Stores the result in the lower element of the returned value, and copy the /// upper element from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sd) @@ -85,9 +85,9 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmaddsd(a, b, c) } -/// Multiply the lower single-precision (32-bit) floating-point elements in +/// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and add the intermediate result to the lower element in `c`. -/// Store the result in the lower element of the returned value, and copy the +/// Stores the result in the lower element of the returned value, and copy the /// 3 upper elements from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ss) @@ -99,7 +99,7 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfmaddss(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// @@ -112,7 +112,7 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmaddsubpd(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// @@ -125,7 +125,7 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d vfmaddsubpd256(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// @@ -138,7 +138,7 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmaddsubps(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// @@ -151,7 +151,7 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmaddsubps256(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_pd) @@ -163,7 +163,7 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmsubpd(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_pd) @@ -175,7 +175,7 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfmsubpd256(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ps) @@ -187,7 +187,7 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmsubps(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ps) @@ -199,7 +199,7 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmsubps256(a, b, c) } -/// Multiply the lower double-precision (64-bit) floating-point elements in +/// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and subtract the lower element in `c` from the intermediate /// result. Store the result in the lower element of the returned value, and /// copy the upper element from `a` to the upper elements of the result. @@ -213,7 +213,7 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmsubsd(a, b, c) } -/// Multiply the lower single-precision (32-bit) floating-point elements in +/// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and subtract the lower element in `c` from the intermediate /// result. Store the result in the lower element of the returned value, and /// copy the 3 upper elements from `a` to the upper elements of the result. @@ -227,7 +227,7 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfmsubss(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// @@ -240,7 +240,7 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmsubaddpd(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// @@ -253,7 +253,7 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d vfmsubaddpd256(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// @@ -266,7 +266,7 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmsubaddps(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// @@ -279,7 +279,7 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmsubaddps256(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_pd) @@ -291,7 +291,7 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmaddpd(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_pd) @@ -303,7 +303,7 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfnmaddpd256(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ps) @@ -315,7 +315,7 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmaddps(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ps) @@ -327,7 +327,7 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfnmaddps256(a, b, c) } -/// Multiply the lower double-precision (64-bit) floating-point elements in +/// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and add the negated intermediate result to the lower element /// in `c`. Store the result in the lower element of the returned value, and /// copy the upper element from `a` to the upper elements of the result. @@ -341,7 +341,7 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmaddsd(a, b, c) } -/// Multiply the lower single-precision (32-bit) floating-point elements in +/// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and add the negated intermediate result to the lower element /// in `c`. Store the result in the lower element of the returned value, and /// copy the 3 upper elements from `a` to the upper elements of the result. @@ -355,7 +355,7 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmaddss(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// @@ -368,7 +368,7 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmsubpd(a, b, c) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// @@ -381,7 +381,7 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfnmsubpd256(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// @@ -394,7 +394,7 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmsubps(a, b, c) } -/// Multiply packed single-precision (32-bit) floating-point elements in `a` +/// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// @@ -407,7 +407,7 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfnmsubps256(a, b, c) } -/// Multiply the lower double-precision (64-bit) floating-point elements in +/// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and subtract packed elements in `c` from the negated /// intermediate result. Store the result in the lower element of the returned /// value, and copy the upper element from `a` to the upper elements of the @@ -422,7 +422,7 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmsubsd(a, b, c) } -/// Multiply the lower single-precision (32-bit) floating-point elements in +/// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and subtract packed elements in `c` from the negated /// intermediate result. Store the result in the lower element of the /// returned value, and copy the 3 upper elements from `a` to the upper diff --git a/library/stdarch/crates/core_arch/src/x86/mmx.rs b/library/stdarch/crates/core_arch/src/x86/mmx.rs index 82f085cf95f..6308fcd36d7 100644 --- a/library/stdarch/crates/core_arch/src/x86/mmx.rs +++ b/library/stdarch/crates/core_arch/src/x86/mmx.rs @@ -25,7 +25,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 { mem::transmute(0_i64) } -/// Add packed 8-bit integers in `a` and `b`. +/// Adds packed 8-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddb))] @@ -33,7 +33,7 @@ pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 { paddb(a, b) } -/// Add packed 8-bit integers in `a` and `b`. +/// Adds packed 8-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddb))] @@ -41,7 +41,7 @@ pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 { _mm_add_pi8(a, b) } -/// Add packed 16-bit integers in `a` and `b`. +/// Adds packed 16-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddw))] @@ -49,7 +49,7 @@ pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 { paddw(a, b) } -/// Add packed 16-bit integers in `a` and `b`. +/// Adds packed 16-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddw))] @@ -57,7 +57,7 @@ pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 { _mm_add_pi16(a, b) } -/// Add packed 32-bit integers in `a` and `b`. +/// Adds packed 32-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddd))] @@ -65,7 +65,7 @@ pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 { paddd(a, b) } -/// Add packed 32-bit integers in `a` and `b`. +/// Adds packed 32-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddd))] @@ -73,7 +73,7 @@ pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 { _mm_add_pi32(a, b) } -/// Add packed 8-bit integers in `a` and `b` using saturation. +/// Adds packed 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsb))] @@ -81,7 +81,7 @@ pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 { paddsb(a, b) } -/// Add packed 8-bit integers in `a` and `b` using saturation. +/// Adds packed 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsb))] @@ -89,7 +89,7 @@ pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 { _mm_adds_pi8(a, b) } -/// Add packed 16-bit integers in `a` and `b` using saturation. +/// Adds packed 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsw))] @@ -97,7 +97,7 @@ pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 { paddsw(a, b) } -/// Add packed 16-bit integers in `a` and `b` using saturation. +/// Adds packed 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsw))] @@ -105,7 +105,7 @@ pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 { _mm_adds_pi16(a, b) } -/// Add packed unsigned 8-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusb))] @@ -113,7 +113,7 @@ pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 { paddusb(a, b) } -/// Add packed unsigned 8-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusb))] @@ -121,7 +121,7 @@ pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 { _mm_adds_pu8(a, b) } -/// Add packed unsigned 16-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusw))] @@ -129,7 +129,7 @@ pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 { paddusw(a, b) } -/// Add packed unsigned 16-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusw))] @@ -257,7 +257,7 @@ pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 { _mm_subs_pu16(a, b) } -/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values @@ -269,7 +269,7 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { packsswb(a, b) } -/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values @@ -362,49 +362,49 @@ pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { punpckldq(a, b) } -/// Set packed 16-bit integers in dst with the supplied values. +/// Sets packed 16-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 { _mm_setr_pi16(e0, e1, e2, e3) } -/// Set packed 32-bit integers in dst with the supplied values. +/// Sets packed 32-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { _mm_setr_pi32(e0, e1) } -/// Set packed 8-bit integers in dst with the supplied values. +/// Sets packed 8-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 { _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) } -/// Broadcast 16-bit integer a to all all elements of dst. +/// Broadcasts 16-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 { _mm_setr_pi16(a, a, a, a) } -/// Broadcast 32-bit integer a to all all elements of dst. +/// Broadcasts 32-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 { _mm_setr_pi32(a, a) } -/// Broadcast 8-bit integer a to all all elements of dst. +/// Broadcasts 8-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { _mm_setr_pi8(a, a, a, a, a, a, a, a) } -/// Set packed 16-bit integers in dst with the supplied values in reverse +/// Sets packed 16-bit integers in dst with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "mmx")] @@ -412,7 +412,7 @@ pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { mem::transmute(i16x4::new(e0, e1, e2, e3)) } -/// Set packed 32-bit integers in dst with the supplied values in reverse +/// Sets packed 32-bit integers in dst with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "mmx")] @@ -420,7 +420,7 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { mem::transmute(i32x2::new(e0, e1)) } -/// Set packed 8-bit integers in dst with the supplied values in reverse order. +/// Sets packed 8-bit integers in dst with the supplied values in reverse order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi8( @@ -456,7 +456,7 @@ pub unsafe fn _m_empty() { emms() } -/// Copy 32-bit integer `a` to the lower elements of the return value, and zero +/// Copies 32-bit integer `a` to the lower elements of the return value, and zero /// the upper element of the return value. #[inline] #[target_feature(enable = "mmx")] diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs index c047b3b1cd6..26131e4e89e 100644 --- a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs +++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs @@ -16,7 +16,7 @@ extern "C" { fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i; } -/// Perform a carry-less multiplication of two 64-bit polynomials over the +/// Performs a carry-less multiplication of two 64-bit polynomials over the /// finite field GF(2^k). /// /// The immediate byte is used for determining which halves of `a` and `b` diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs index 90bb9454b0a..49abc3d0497 100644 --- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs @@ -20,7 +20,7 @@ extern "unadjusted" { use stdsimd_test::assert_instr; /// Read a hardware generated 16-bit random value and store the result in val. -/// Return 1 if a random value was generated, and 0 otherwise. +/// Returns 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step) #[inline] @@ -34,7 +34,7 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { } /// Read a hardware generated 32-bit random value and store the result in val. -/// Return 1 if a random value was generated, and 0 otherwise. +/// Returns 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs index 98bf4707f8c..3ff4e89d23e 100644 --- a/library/stdarch/crates/core_arch/src/x86/sha.rs +++ b/library/stdarch/crates/core_arch/src/x86/sha.rs @@ -23,7 +23,7 @@ extern "C" { #[cfg(test)] use stdsimd_test::assert_instr; -/// Perform an intermediate calculation for the next four SHA1 message values +/// Performs an intermediate calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and returning the result. /// @@ -36,7 +36,7 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) } -/// Perform the final calculation for the next four SHA1 message values +/// Performs the final calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using the intermediate result in `a` and the /// previous message values in `b`, and returns the result. /// @@ -62,7 +62,7 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) } -/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) +/// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) /// from `a` and some pre-computed sum of the next 4 round message values /// (unsigned 32-bit integers), and state variable E from `b`, and return the /// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round @@ -86,7 +86,7 @@ pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i mem::transmute(ret) } -/// Perform an intermediate calculation for the next four SHA256 message values +/// Performs an intermediate calculation for the next four SHA256 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and return the result. /// @@ -99,7 +99,7 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) } -/// Perform the final calculation for the next four SHA256 message values +/// Performs the final calculation for the next four SHA256 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and return the result. /// @@ -112,7 +112,7 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) } -/// Perform 2 rounds of SHA256 operation using an initial SHA256 state +/// Performs 2 rounds of SHA256 operation using an initial SHA256 state /// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a /// pre-computed sum of the next 2 round message values (unsigned 32-bit /// integers) and the corresponding round constants from `k`, and store the diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index 22bdc118c2b..96ea08697c0 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -102,7 +102,7 @@ pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { simd_div(a, b) } -/// Return the square root of the first single-precision (32-bit) +/// Returns the square root of the first single-precision (32-bit) /// floating-point element in `a`, the other elements are unchanged. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ss) @@ -114,7 +114,7 @@ pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 { sqrtss(a) } -/// Return the square root of packed single-precision (32-bit) floating-point +/// Returns the square root of packed single-precision (32-bit) floating-point /// elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ps) @@ -126,7 +126,7 @@ pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 { sqrtps(a) } -/// Return the approximate reciprocal of the first single-precision +/// Returns the approximate reciprocal of the first single-precision /// (32-bit) floating-point element in `a`, the other elements are unchanged. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss) @@ -138,7 +138,7 @@ pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 { rcpss(a) } -/// Return the approximate reciprocal of packed single-precision (32-bit) +/// Returns the approximate reciprocal of packed single-precision (32-bit) /// floating-point elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps) @@ -150,7 +150,7 @@ pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 { rcpps(a) } -/// Return the approximate reciprocal square root of the fist single-precision +/// Returns the approximate reciprocal square root of the fist single-precision /// (32-bit) floating-point elements in `a`, the other elements are unchanged. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss) @@ -162,7 +162,7 @@ pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 { rsqrtss(a) } -/// Return the approximate reciprocal square root of packed single-precision +/// Returns the approximate reciprocal square root of packed single-precision /// (32-bit) floating-point elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ps) @@ -174,7 +174,7 @@ pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 { rsqrtps(a) } -/// Compare the first single-precision (32-bit) floating-point element of `a` +/// Compares the first single-precision (32-bit) floating-point element of `a` /// and `b`, and return the minimum value in the first element of the return /// value, the other elements are copied from `a`. /// @@ -187,7 +187,7 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { minss(a, b) } -/// Compare packed single-precision (32-bit) floating-point elements in `a` and +/// Compares packed single-precision (32-bit) floating-point elements in `a` and /// `b`, and return the corresponding minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ps) @@ -199,7 +199,7 @@ pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { minps(a, b) } -/// Compare the first single-precision (32-bit) floating-point element of `a` +/// Compares the first single-precision (32-bit) floating-point element of `a` /// and `b`, and return the maximum value in the first element of the return /// value, the other elements are copied from `a`. /// @@ -212,7 +212,7 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { maxss(a, b) } -/// Compare packed single-precision (32-bit) floating-point elements in `a` and +/// Compares packed single-precision (32-bit) floating-point elements in `a` and /// `b`, and return the corresponding maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ps) @@ -298,7 +298,7 @@ pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { mem::transmute(simd_xor(a, b)) } -/// Compare the lowest `f32` of both inputs for equality. The lowest 32 bits of +/// Compares the lowest `f32` of both inputs for equality. The lowest 32 bits of /// the result will be `0xffffffff` if the two inputs are equal, or `0` /// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`. /// @@ -311,7 +311,7 @@ pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 0) } -/// Compare the lowest `f32` of both inputs for less than. The lowest 32 bits +/// Compares the lowest `f32` of both inputs for less than. The lowest 32 bits /// of the result will be `0xffffffff` if `a.extract(0)` is less than /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. @@ -325,7 +325,7 @@ pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 1) } -/// Compare the lowest `f32` of both inputs for less than or equal. The lowest +/// Compares the lowest `f32` of both inputs for less than or equal. The lowest /// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than /// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. @@ -339,7 +339,7 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 2) } -/// Compare the lowest `f32` of both inputs for greater than. The lowest 32 +/// Compares the lowest `f32` of both inputs for greater than. The lowest 32 /// bits of the result will be `0xffffffff` if `a.extract(0)` is greater /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. @@ -353,7 +353,7 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3]) } -/// Compare the lowest `f32` of both inputs for greater than or equal. The +/// Compares the lowest `f32` of both inputs for greater than or equal. The /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is /// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits /// of the result are the upper 96 bits of `a`. @@ -367,7 +367,7 @@ pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3]) } -/// Compare the lowest `f32` of both inputs for inequality. The lowest 32 bits +/// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits /// of the result will be `0xffffffff` if `a.extract(0)` is not equal to /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. @@ -381,7 +381,7 @@ pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 4) } -/// Compare the lowest `f32` of both inputs for not-less-than. The lowest 32 +/// Compares the lowest `f32` of both inputs for not-less-than. The lowest 32 /// bits of the result will be `0xffffffff` if `a.extract(0)` is not less than /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. @@ -395,7 +395,7 @@ pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 5) } -/// Compare the lowest `f32` of both inputs for not-less-than-or-equal. The +/// Compares the lowest `f32` of both inputs for not-less-than-or-equal. The /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not /// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits /// of the result are the upper 96 bits of `a`. @@ -409,7 +409,7 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 6) } -/// Compare the lowest `f32` of both inputs for not-greater-than. The lowest 32 +/// Compares the lowest `f32` of both inputs for not-greater-than. The lowest 32 /// bits of the result will be `0xffffffff` if `a.extract(0)` is not greater /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are /// the upper 96 bits of `a`. @@ -423,7 +423,7 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3]) } -/// Compare the lowest `f32` of both inputs for not-greater-than-or-equal. The +/// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not /// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 /// bits of the result are the upper 96 bits of `a`. @@ -437,7 +437,7 @@ pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3]) } -/// Check if the lowest `f32` of both inputs are ordered. The lowest 32 bits of +/// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of /// the result will be `0xffffffff` if neither of `a.extract(0)` or /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. @@ -451,7 +451,7 @@ pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 7) } -/// Check if the lowest `f32` of both inputs are unordered. The lowest 32 bits +/// Checks if the lowest `f32` of both inputs are unordered. The lowest 32 bits /// of the result will be `0xffffffff` if any of `a.extract(0)` or /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. @@ -465,7 +465,7 @@ pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 3) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input elements /// were equal, or `0` otherwise. /// @@ -478,7 +478,7 @@ pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 0) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is less than the corresponding element in `b`, or `0` otherwise. /// @@ -491,7 +491,7 @@ pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 1) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is less than or equal to the corresponding element in `b`, or `0` /// otherwise. @@ -505,7 +505,7 @@ pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 2) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is greater than the corresponding element in `b`, or `0` otherwise. /// @@ -518,7 +518,7 @@ pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 1) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is greater than or equal to the corresponding element in `b`, or `0` /// otherwise. @@ -532,9 +532,9 @@ pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 2) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input elements -/// are *not* equal, or `0` otherwise. +/// are **not** equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ps) #[inline] @@ -545,9 +545,9 @@ pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 4) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element -/// in `a` is *not* less than the corresponding element in `b`, or `0` +/// in `a` is **not** less than the corresponding element in `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ps) @@ -559,9 +559,9 @@ pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 5) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element -/// in `a` is *not* less than or equal to the corresponding element in `b`, or +/// in `a` is **not** less than or equal to the corresponding element in `b`, or /// `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ps) @@ -573,9 +573,9 @@ pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 6) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element -/// in `a` is *not* greater than the corresponding element in `b`, or `0` +/// in `a` is **not** greater than the corresponding element in `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ps) @@ -587,9 +587,9 @@ pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 5) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element -/// in `a` is *not* greater than or equal to the corresponding element in `b`, +/// in `a` is **not** greater than or equal to the corresponding element in `b`, /// or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ps) @@ -601,7 +601,7 @@ pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 6) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// Returns four floats that have one of two possible bit patterns. The element /// in the output vector will be `0xffffffff` if the input elements in `a` and /// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise. @@ -615,7 +615,7 @@ pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 7) } -/// Compare each of the four floats in `a` to the corresponding element in `b`. +/// Compares each of the four floats in `a` to the corresponding element in `b`. /// Returns four floats that have one of two possible bit patterns. The element /// in the output vector will be `0xffffffff` if the input elements in `a` and /// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise. @@ -629,7 +629,7 @@ pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 3) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_ss) @@ -641,7 +641,7 @@ pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { comieq_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_ss) @@ -653,7 +653,7 @@ pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { comilt_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than or equal to the one from `b`, or `0` /// otherwise. /// @@ -666,7 +666,7 @@ pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { comile_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than the one from `b`, or `0` /// otherwise. /// @@ -679,7 +679,7 @@ pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { comigt_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than or equal to the one from `b`, or /// `0` otherwise. /// @@ -692,8 +692,8 @@ pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { comige_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns -/// `1` if they are *not* equal, or `0` otherwise. +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if they are **not** equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_ss) #[inline] @@ -704,7 +704,7 @@ pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { comineq_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are equal, or `0` otherwise. This instruction will not signal /// an exception if either argument is a quiet NaN. /// @@ -717,7 +717,7 @@ pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { ucomieq_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. /// This instruction will not signal an exception if either argument is a quiet /// NaN. @@ -731,7 +731,7 @@ pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { ucomilt_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than or equal to the one from `b`, or `0` /// otherwise. This instruction will not signal an exception if either argument /// is a quiet NaN. @@ -745,7 +745,7 @@ pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { ucomile_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than the one from `b`, or `0` /// otherwise. This instruction will not signal an exception if either argument /// is a quiet NaN. @@ -759,7 +759,7 @@ pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { ucomigt_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than or equal to the one from `b`, or /// `0` otherwise. This instruction will not signal an exception if either /// argument is a quiet NaN. @@ -773,8 +773,8 @@ pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { ucomige_ss(a, b) } -/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns -/// `1` if they are *not* equal, or `0` otherwise. This instruction will not +/// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns +/// `1` if they are **not** equal, or `0` otherwise. This instruction will not /// signal an exception if either argument is a quiet NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_ss) @@ -786,7 +786,7 @@ pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { ucomineq_ss(a, b) } -/// Convert the lowest 32 bit float in the input vector to a 32 bit integer. +/// Converts the lowest 32 bit float in the input vector to a 32 bit integer. /// /// The result is rounded according to the current rounding mode. If the result /// cannot be represented as a 32 bit integer the result will be `0x8000_0000` @@ -815,7 +815,7 @@ pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 { _mm_cvtss_si32(a) } -/// Convert the lowest 32 bit float in the input vector to a 32 bit integer +/// Converts the lowest 32 bit float in the input vector to a 32 bit integer /// with /// truncation. /// @@ -846,7 +846,7 @@ pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 { _mm_cvttss_si32(a) } -/// Extract the lowest 32 bit float from the input vector. +/// Extracts the lowest 32 bit float from the input vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32) #[inline] @@ -858,7 +858,7 @@ pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { simd_extract(a, 0) } -/// Convert a 32 bit integer to a 32 bit float. The result vector is the input +/// Converts a 32 bit integer to a 32 bit float. The result vector is the input /// vector `a` with the lowest 32 bit float replaced by the converted integer. /// /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit @@ -985,7 +985,7 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { ((z << 6) | (y << 4) | (x << 2) | w) as i32 } -/// Shuffle packed single-precision (32-bit) floating-point elements in `a` and +/// Shuffles packed single-precision (32-bit) floating-point elements in `a` and /// `b` using `mask`. /// /// The lower half of result takes values from `a` and the higher half from @@ -1043,7 +1043,7 @@ pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 { } } -/// Unpack and interleave single-precision (32-bit) floating-point elements +/// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the higher half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_ps) @@ -1055,7 +1055,7 @@ pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [2, 6, 3, 7]) } -/// Unpack and interleave single-precision (32-bit) floating-point elements +/// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the lower half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_ps) @@ -1092,7 +1092,7 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [0, 1, 4, 5]) } -/// Return a mask of the most significant bit of each element in `a`. +/// Returns a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 4 least significant bits of the return value. /// All other bits are set to `0`. @@ -1106,7 +1106,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { movmskps(a) } -/// Set the upper two single-precision floating-point values with 64 bits of +/// Sets the upper two single-precision floating-point values with 64 bits of /// data loaded from the address `p`; the lower two values are passed through /// from `a`. #[inline] @@ -1135,7 +1135,7 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { simd_shuffle4(a, bb, [0, 1, 4, 5]) } -/// Load two floats from `p` into the lower half of a `__m128`. The upper half +/// Loads two floats from `p` into the lower half of a `__m128`. The upper half /// is copied from the upper half of `a`. #[inline] #[target_feature(enable = "sse")] @@ -1197,7 +1197,7 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 { _mm_load1_ps(p) } -/// Load four `f32` values from *aligned* memory into a `__m128`. If the +/// Loads four `f32` values from *aligned* memory into a `__m128`. If the /// pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). /// @@ -1216,7 +1216,7 @@ pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 { *(p as *const __m128) } -/// Load four `f32` values from memory into a `__m128`. There are no +/// Loads four `f32` values from memory into a `__m128`. There are no /// restrictions /// on memory alignment. For aligned memory /// [`_mm_load_ps`](fn._mm_load_ps.html) @@ -1241,7 +1241,7 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { dst } -/// Load four `f32` values from aligned memory into a `__m128` in reverse +/// Loads four `f32` values from aligned memory into a `__m128` in reverse /// order. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general @@ -1271,7 +1271,7 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 { simd_shuffle4(a, a, [3, 2, 1, 0]) } -/// Store the upper half of `a` (64 bits) into memory. +/// Stores the upper half of `a` (64 bits) into memory. /// /// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may /// choose to generate an equivalent sequence of other instructions. @@ -1305,7 +1305,7 @@ pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) { } } -/// Store the lower half of `a` (64 bits) into memory. +/// Stores the lower half of `a` (64 bits) into memory. /// /// This intrinsic corresponds to the `MOVQ` instruction. The compiler may /// choose to generate an equivalent sequence of other instructions. @@ -1337,7 +1337,7 @@ pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) { } } -/// Store the lowest 32 bit float of `a` into memory. +/// Stores the lowest 32 bit float of `a` into memory. /// /// This intrinsic corresponds to the `MOVSS` instruction. /// @@ -1350,7 +1350,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { *p = simd_extract(a, 0); } -/// Store the lowest 32 bit float of `a` repeated four times into *aligned* +/// Stores the lowest 32 bit float of `a` repeated four times into *aligned* /// memory. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general @@ -1389,7 +1389,7 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) { _mm_store1_ps(p, a); } -/// Store four 32-bit floats into *aligned* memory. +/// Stores four 32-bit floats into *aligned* memory. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). @@ -1409,7 +1409,7 @@ pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) { *(p as *mut __m128) = a; } -/// Store four 32-bit floats into memory. There are no restrictions on memory +/// Stores four 32-bit floats into memory. There are no restrictions on memory /// alignment. For aligned memory [`_mm_store_ps`](fn._mm_store_ps.html) may be /// faster. /// @@ -1428,7 +1428,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { ); } -/// Store four 32-bit floats into *aligned* memory in reverse order. +/// Stores four 32-bit floats into *aligned* memory in reverse order. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). @@ -1454,7 +1454,7 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { *(p as *mut __m128) = b; } -/// Return a `__m128` with the first component from `b` and the remaining +/// Returns a `__m128` with the first component from `b` and the remaining /// components from `a`. /// /// In other words for any `a` and `b`: @@ -1471,7 +1471,7 @@ pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [4, 1, 2, 3]) } -/// Perform a serializing operation on all store-to-memory instructions that +/// Performs a serializing operation on all store-to-memory instructions that /// were issued prior to this instruction. /// /// Guarantees that every store instruction that precedes, in program order, is @@ -1487,7 +1487,7 @@ pub unsafe fn _mm_sfence() { sfence() } -/// Get the unsigned 32-bit value of the MXCSR control and status register. +/// Gets the unsigned 32-bit value of the MXCSR control and status register. /// /// For more info see [`_mm_setcsr`](fn._mm_setcsr.html) /// @@ -1502,7 +1502,7 @@ pub unsafe fn _mm_getcsr() -> u32 { result as u32 } -/// Set the MXCSR register with the 32-bit unsigned integer value. +/// Sets the MXCSR register with the 32-bit unsigned integer value. /// /// This register constrols how SIMD instructions handle floating point /// operations. Modifying this register only affects the current thread. @@ -1878,7 +1878,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) { pref!(strategy) } -/// Return vector of type __m128 with undefined elements. +/// Returns vector of type __m128 with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps) #[inline] @@ -2040,7 +2040,7 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { intrinsics::nontemporal_store(mem_addr as *mut __m128, a); } -/// Store 64-bits of integer data from a into memory using a non-temporal +/// Stores 64-bits of integer data from a into memory using a non-temporal /// memory hint. #[inline] #[target_feature(enable = "sse,mmx")] @@ -2437,7 +2437,7 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { constify_imm8!(imm8, call) } -/// Convert the two lower packed single-precision (32-bit) floating-point +/// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers with truncation. #[inline] #[target_feature(enable = "sse,mmx")] @@ -2446,7 +2446,7 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 { cvttps2pi(a) } -/// Convert the two lower packed single-precision (32-bit) floating-point +/// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers with truncation. #[inline] #[target_feature(enable = "sse,mmx")] @@ -2455,7 +2455,7 @@ pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 { _mm_cvttps_pi32(a) } -/// Convert the two lower packed single-precision (32-bit) floating-point +/// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers. #[inline] #[target_feature(enable = "sse,mmx")] @@ -2464,7 +2464,7 @@ pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 { cvtps2pi(a) } -/// Convert the two lower packed single-precision (32-bit) floating-point +/// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers. #[inline] #[target_feature(enable = "sse,mmx")] @@ -2473,7 +2473,7 @@ pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 { _mm_cvtps_pi32(a) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed 16-bit integers. #[inline] #[target_feature(enable = "sse,mmx")] @@ -2485,7 +2485,7 @@ pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 { _mm_packs_pi32(b, c) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed 8-bit integers, and returns theem in the lower 4 elements of the /// result. #[inline] @@ -3784,7 +3784,7 @@ mod tests { let mut ofs = 0; let mut p = vals.as_mut_ptr(); - // Make sure p is *not* aligned to 16-byte boundary + // Make sure p is **not** aligned to 16-byte boundary if (p as usize) & 0xf == 0 { ofs = 1; p = p.offset(1); diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index d8700e538ed..df438f6547c 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -10,7 +10,7 @@ use intrinsics; use mem; use ptr; -/// Provide a hint to the processor that the code sequence is a spin-wait loop. +/// Provides a hint to the processor that the code sequence is a spin-wait loop. /// /// This can help improve the performance and power consumption of spin-wait /// loops. @@ -24,7 +24,7 @@ pub unsafe fn _mm_pause() { pause() } -/// Invalidate and flush the cache line that contains `p` from all levels of +/// Invalidates and flushes the cache line that contains `p` from all levels of /// the cache hierarchy. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush) @@ -36,7 +36,7 @@ pub unsafe fn _mm_clflush(p: *mut u8) { clflush(p) } -/// Perform a serializing operation on all load-from-memory instructions +/// Performs a serializing operation on all load-from-memory instructions /// that were issued prior to this instruction. /// /// Guarantees that every load instruction that precedes, in program order, is @@ -52,7 +52,7 @@ pub unsafe fn _mm_lfence() { lfence() } -/// Perform a serializing operation on all load-from-memory and store-to-memory +/// Performs a serializing operation on all load-from-memory and store-to-memory /// instructions that were issued prior to this instruction. /// /// Guarantees that every memory access that precedes, in program order, the @@ -68,7 +68,7 @@ pub unsafe fn _mm_mfence() { mfence() } -/// Add packed 8-bit integers in `a` and `b`. +/// Adds packed 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8) #[inline] @@ -79,7 +79,7 @@ pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } -/// Add packed 16-bit integers in `a` and `b`. +/// Adds packed 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16) #[inline] @@ -90,7 +90,7 @@ pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } -/// Add packed 32-bit integers in `a` and `b`. +/// Adds packed 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32) #[inline] @@ -101,7 +101,7 @@ pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } -/// Add packed 64-bit integers in `a` and "b`. +/// Adds packed 64-bit integers in `a` and "b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64) #[inline] @@ -112,7 +112,7 @@ pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } -/// Add packed 8-bit integers in `a` and `b` using saturation. +/// Adds packed 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8) #[inline] @@ -123,7 +123,7 @@ pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsb(a.as_i8x16(), b.as_i8x16())) } -/// Add packed 16-bit integers in `a` and `b` using saturation. +/// Adds packed 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16) #[inline] @@ -134,7 +134,7 @@ pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsw(a.as_i16x8(), b.as_i16x8())) } -/// Add packed unsigned 8-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8) #[inline] @@ -145,7 +145,7 @@ pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsub(a.as_u8x16(), b.as_u8x16())) } -/// Add packed unsigned 16-bit integers in `a` and `b` using saturation. +/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16) #[inline] @@ -156,7 +156,7 @@ pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(paddsuw(a.as_u16x8(), b.as_u16x8())) } -/// Average packed unsigned 8-bit integers in `a` and `b`. +/// Averages packed unsigned 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8) #[inline] @@ -167,7 +167,7 @@ pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pavgb(a.as_u8x16(), b.as_u8x16())) } -/// Average packed unsigned 16-bit integers in `a` and `b`. +/// Averages packed unsigned 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16) #[inline] @@ -178,9 +178,9 @@ pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pavgw(a.as_u16x8(), b.as_u16x8())) } -/// Multiply and then horizontally add signed 16 bit integers in `a` and `b`. +/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. /// -/// Multiply packed signed 16-bit integers in `a` and `b`, producing +/// Multiplies packed signed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of /// intermediate 32-bit integers. /// @@ -193,7 +193,7 @@ pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } -/// Compare packed 16-bit integers in `a` and `b`, and return the packed +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16) @@ -205,7 +205,7 @@ pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) } -/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the /// packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8) @@ -217,7 +217,7 @@ pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) } -/// Compare packed 16-bit integers in `a` and `b`, and return the packed +/// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16) @@ -229,7 +229,7 @@ pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminsw(a.as_i16x8(), b.as_i16x8())) } -/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the +/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the /// packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8) @@ -241,7 +241,7 @@ pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminub(a.as_u8x16(), b.as_u8x16())) } -/// Multiply the packed 16-bit integers in `a` and `b`. +/// Multiplies the packed 16-bit integers in `a` and `b`. /// /// The multiplication produces intermediate 32-bit integers, and returns the /// high 16 bits of the intermediate integers. @@ -255,7 +255,7 @@ pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) } -/// Multiply the packed unsigned 16-bit integers in `a` and `b`. +/// Multiplies the packed unsigned 16-bit integers in `a` and `b`. /// /// The multiplication produces intermediate 32-bit integers, and returns the /// high 16 bits of the intermediate integers. @@ -269,7 +269,7 @@ pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) } -/// Multiply the packed 16-bit integers in `a` and `b`. +/// Multiplies the packed 16-bit integers in `a` and `b`. /// /// The multiplication produces intermediate 32-bit integers, and returns the /// low 16 bits of the intermediate integers. @@ -283,10 +283,10 @@ pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } -/// Multiply the low unsigned 32-bit integers from each packed 64-bit element +/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element /// in `a` and `b`. /// -/// Return the unsigned 64-bit results. +/// Returns the unsigned 64-bit results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32) #[inline] @@ -299,7 +299,7 @@ pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { /// Sum the absolute differences of packed unsigned 8-bit integers. /// -/// Compute the absolute differences of packed unsigned 8-bit integers in `a` +/// Computes the absolute differences of packed unsigned 8-bit integers in `a` /// and `b`, then horizontally sum each consecutive 8 differences to produce /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in /// the low 16 bits of 64-bit elements returned. @@ -313,7 +313,7 @@ pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psadbw(a.as_u8x16(), b.as_u8x16())) } -/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. +/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8) #[inline] @@ -324,7 +324,7 @@ pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } -/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. +/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16) #[inline] @@ -405,7 +405,7 @@ pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psubusw(a.as_u16x8(), b.as_u16x8())) } -/// Shift `a` left by `imm8` bytes while shifting in zeros. +/// Shifts `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128) #[inline] @@ -472,7 +472,7 @@ unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i { mem::transmute(x) } -/// Shift `a` left by `imm8` bytes while shifting in zeros. +/// Shifts `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128) #[inline] @@ -484,7 +484,7 @@ pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128_impl(a, imm8) } -/// Shift `a` right by `imm8` bytes while shifting in zeros. +/// Shifts `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128) #[inline] @@ -496,7 +496,7 @@ pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128_impl(a, imm8) } -/// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros. +/// Shifts packed 16-bit integers in `a` left by `imm8` while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16) #[inline] @@ -508,7 +508,7 @@ pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(pslliw(a.as_i16x8(), imm8)) } -/// Shift packed 16-bit integers in `a` left by `count` while shifting in +/// Shifts packed 16-bit integers in `a` left by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16) @@ -520,7 +520,7 @@ pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllw(a.as_i16x8(), count.as_i16x8())) } -/// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros. +/// Shifts packed 32-bit integers in `a` left by `imm8` while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32) #[inline] @@ -532,7 +532,7 @@ pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psllid(a.as_i32x4(), imm8)) } -/// Shift packed 32-bit integers in `a` left by `count` while shifting in +/// Shifts packed 32-bit integers in `a` left by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32) @@ -544,7 +544,7 @@ pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(pslld(a.as_i32x4(), count.as_i32x4())) } -/// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros. +/// Shifts packed 64-bit integers in `a` left by `imm8` while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64) #[inline] @@ -556,7 +556,7 @@ pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i { mem::transmute(pslliq(a.as_i64x2(), imm8)) } -/// Shift packed 64-bit integers in `a` left by `count` while shifting in +/// Shifts packed 64-bit integers in `a` left by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64) @@ -568,7 +568,7 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psllq(a.as_i64x2(), count.as_i64x2())) } -/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign +/// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16) @@ -581,7 +581,7 @@ pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psraiw(a.as_i16x8(), imm8)) } -/// Shift packed 16-bit integers in `a` right by `count` while shifting in sign +/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16) @@ -593,7 +593,7 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psraw(a.as_i16x8(), count.as_i16x8())) } -/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign +/// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32) @@ -606,7 +606,7 @@ pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psraid(a.as_i32x4(), imm8)) } -/// Shift packed 32-bit integers in `a` right by `count` while shifting in sign +/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32) @@ -618,7 +618,7 @@ pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrad(a.as_i32x4(), count.as_i32x4())) } -/// Shift `a` right by `imm8` bytes while shifting in zeros. +/// Shifts `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128) #[inline] @@ -685,7 +685,7 @@ unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i { mem::transmute(x) } -/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in +/// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16) @@ -698,7 +698,7 @@ pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrliw(a.as_i16x8(), imm8)) } -/// Shift packed 16-bit integers in `a` right by `count` while shifting in +/// Shifts packed 16-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16) @@ -710,7 +710,7 @@ pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } -/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in +/// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32) @@ -723,7 +723,7 @@ pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrlid(a.as_i32x4(), imm8)) } -/// Shift packed 32-bit integers in `a` right by `count` while shifting in +/// Shifts packed 32-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32) @@ -735,7 +735,7 @@ pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrld(a.as_i32x4(), count.as_i32x4())) } -/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in +/// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64) @@ -748,7 +748,7 @@ pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i { mem::transmute(psrliq(a.as_i64x2(), imm8)) } -/// Shift packed 64-bit integers in `a` right by `count` while shifting in +/// Shifts packed 64-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64) @@ -760,7 +760,7 @@ pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { mem::transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } -/// Compute the bitwise AND of 128 bits (representing integer data) in `a` and +/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128) @@ -772,7 +772,7 @@ pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { simd_and(a, b) } -/// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and +/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and /// then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128) @@ -784,7 +784,7 @@ pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) } -/// Compute the bitwise OR of 128 bits (representing integer data) in `a` and +/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128) @@ -796,7 +796,7 @@ pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { simd_or(a, b) } -/// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and +/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128) @@ -808,7 +808,7 @@ pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { simd_xor(a, b) } -/// Compare packed 8-bit integers in `a` and `b` for equality. +/// Compares packed 8-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8) #[inline] @@ -819,7 +819,7 @@ pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) } -/// Compare packed 16-bit integers in `a` and `b` for equality. +/// Compares packed 16-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16) #[inline] @@ -830,7 +830,7 @@ pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) } -/// Compare packed 32-bit integers in `a` and `b` for equality. +/// Compares packed 32-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32) #[inline] @@ -841,7 +841,7 @@ pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) } -/// Compare packed 8-bit integers in `a` and `b` for greater-than. +/// Compares packed 8-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8) #[inline] @@ -852,7 +852,7 @@ pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) } -/// Compare packed 16-bit integers in `a` and `b` for greater-than. +/// Compares packed 16-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16) #[inline] @@ -863,7 +863,7 @@ pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) } -/// Compare packed 32-bit integers in `a` and `b` for greater-than. +/// Compares packed 32-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32) #[inline] @@ -874,7 +874,7 @@ pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) } -/// Compare packed 8-bit integers in `a` and `b` for less-than. +/// Compares packed 8-bit integers in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8) #[inline] @@ -885,7 +885,7 @@ pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) } -/// Compare packed 16-bit integers in `a` and `b` for less-than. +/// Compares packed 16-bit integers in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16) #[inline] @@ -896,7 +896,7 @@ pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) } -/// Compare packed 32-bit integers in `a` and `b` for less-than. +/// Compares packed 32-bit integers in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32) #[inline] @@ -907,7 +907,7 @@ pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) } -/// Convert the lower two packed 32-bit integers in `a` to packed +/// Converts the lower two packed 32-bit integers in `a` to packed /// double-precision (64-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd) @@ -920,7 +920,7 @@ pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { simd_cast::(simd_shuffle2(a, a, [0, 1])) } -/// Return `a` with its lower element replaced by `b` after converting it to +/// Returns `a` with its lower element replaced by `b` after converting it to /// an `f64`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd) @@ -932,7 +932,7 @@ pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { simd_insert(a, 0, b as f64) } -/// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) +/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps) @@ -944,7 +944,7 @@ pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { cvtdq2ps(a.as_i32x4()) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` +/// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32) @@ -956,7 +956,7 @@ pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { mem::transmute(cvtps2dq(a)) } -/// Return a vector whose lowest element is `a` and all higher elements are +/// Returns a vector whose lowest element is `a` and all higher elements are /// `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128) @@ -968,7 +968,7 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { mem::transmute(i32x4::new(a, 0, 0, 0)) } -/// Return the lowest element of `a`. +/// Returns the lowest element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32) #[inline] @@ -979,7 +979,7 @@ pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { simd_extract(a.as_i32x4(), 0) } -/// Set packed 64-bit integers with the supplied values, from highest to +/// Sets packed 64-bit integers with the supplied values, from highest to /// lowest. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x) @@ -991,7 +991,7 @@ pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { mem::transmute(i64x2::new(e0, e1)) } -/// Set packed 32-bit integers with the supplied values. +/// Sets packed 32-bit integers with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32) #[inline] @@ -1002,7 +1002,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { mem::transmute(i32x4::new(e0, e1, e2, e3)) } -/// Set packed 16-bit integers with the supplied values. +/// Sets packed 16-bit integers with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16) #[inline] @@ -1022,7 +1022,7 @@ pub unsafe fn _mm_set_epi16( mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } -/// Set packed 8-bit integers with the supplied values. +/// Sets packed 8-bit integers with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8) #[inline] @@ -1053,7 +1053,7 @@ pub unsafe fn _mm_set_epi8( )) } -/// Broadcast 64-bit integer `a` to all elements. +/// Broadcasts 64-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x) #[inline] @@ -1064,7 +1064,7 @@ pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { _mm_set_epi64x(a, a) } -/// Broadcast 32-bit integer `a` to all elements. +/// Broadcasts 32-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32) #[inline] @@ -1075,7 +1075,7 @@ pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { _mm_set_epi32(a, a, a, a) } -/// Broadcast 16-bit integer `a` to all elements. +/// Broadcasts 16-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16) #[inline] @@ -1086,7 +1086,7 @@ pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { _mm_set_epi16(a, a, a, a, a, a, a, a) } -/// Broadcast 8-bit integer `a` to all elements. +/// Broadcasts 8-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8) #[inline] @@ -1097,7 +1097,7 @@ pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } -/// Set packed 32-bit integers with the supplied values in reverse order. +/// Sets packed 32-bit integers with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32) #[inline] @@ -1108,7 +1108,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { _mm_set_epi32(e0, e1, e2, e3) } -/// Set packed 16-bit integers with the supplied values in reverse order. +/// Sets packed 16-bit integers with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16) #[inline] @@ -1128,7 +1128,7 @@ pub unsafe fn _mm_setr_epi16( _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) } -/// Set packed 8-bit integers with the supplied values in reverse order. +/// Sets packed 8-bit integers with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8) #[inline] @@ -1170,7 +1170,7 @@ pub unsafe fn _mm_setzero_si128() -> __m128i { _mm_set1_epi64x(0) } -/// Load 64-bit integer from memory into first element of returned vector. +/// Loads 64-bit integer from memory into first element of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64) #[inline] @@ -1190,7 +1190,7 @@ pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64)) } -/// Load 128-bits of integer data from memory into a new vector. +/// Loads 128-bits of integer data from memory into a new vector. /// /// `mem_addr` must be aligned on a 16-byte boundary. /// @@ -1203,7 +1203,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { *mem_addr } -/// Load 128-bits of integer data from memory into a new vector. +/// Loads 128-bits of integer data from memory into a new vector. /// /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1240,7 +1240,7 @@ pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) } -/// Store 128-bits of integer data from `a` into memory. +/// Stores 128-bits of integer data from `a` into memory. /// /// `mem_addr` must be aligned on a 16-byte boundary. /// @@ -1253,7 +1253,7 @@ pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { *mem_addr = a; } -/// Store 128-bits of integer data from `a` into memory. +/// Stores 128-bits of integer data from `a` into memory. /// /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1266,7 +1266,7 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { storeudq(mem_addr as *mut i8, a); } -/// Store the lower 64-bit integer `a` to a memory location. +/// Stores the lower 64-bit integer `a` to a memory location. /// /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -1314,7 +1314,7 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { ::intrinsics::nontemporal_store(mem_addr, a); } -/// Return a vector where the low element is extracted from `a` and its upper +/// Returns a vector where the low element is extracted from `a` and its upper /// element is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64) @@ -1329,7 +1329,7 @@ pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { mem::transmute(r) } -/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16) @@ -1341,7 +1341,7 @@ pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } -/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32) @@ -1353,7 +1353,7 @@ pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } -/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16) @@ -1365,7 +1365,7 @@ pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } -/// Return the `imm8` element of `a`. +/// Returns the `imm8` element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16) #[inline] @@ -1377,7 +1377,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } -/// Return a new vector where the `imm8` element of `a` is replaced with `i`. +/// Returns a new vector where the `imm8` element of `a` is replaced with `i`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16) #[inline] @@ -1389,7 +1389,7 @@ pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16)) } -/// Return a mask of the most significant bit of each element in `a`. +/// Returns a mask of the most significant bit of each element in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8) #[inline] @@ -1400,7 +1400,7 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { pmovmskb(a.as_i8x16()) } -/// Shuffle 32-bit integers in `a` using the control in `imm8`. +/// Shuffles 32-bit integers in `a` using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32) #[inline] @@ -1463,7 +1463,7 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { mem::transmute(x) } -/// Shuffle 16-bit integers in the high 64 bits of `a` using the control in +/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in /// `imm8`. /// /// Put the results in the high 64 bits of the returned vector, with the low 64 @@ -1523,7 +1523,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(x) } -/// Shuffle 16-bit integers in the low 64 bits of `a` using the control in +/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in /// `imm8`. /// /// Put the results in the low 64 bits of the returned vector, with the high 64 @@ -1584,7 +1584,7 @@ pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { mem::transmute(x) } -/// Unpack and interleave 8-bit integers from the high half of `a` and `b`. +/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8) #[inline] @@ -1599,7 +1599,7 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { )) } -/// Unpack and interleave 16-bit integers from the high half of `a` and `b`. +/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16) #[inline] @@ -1611,7 +1611,7 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(x) } -/// Unpack and interleave 32-bit integers from the high half of `a` and `b`. +/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32) #[inline] @@ -1622,7 +1622,7 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } -/// Unpack and interleave 64-bit integers from the high half of `a` and `b`. +/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64) #[inline] @@ -1633,7 +1633,7 @@ pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3])) } -/// Unpack and interleave 8-bit integers from the low half of `a` and `b`. +/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8) #[inline] @@ -1648,7 +1648,7 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { )) } -/// Unpack and interleave 16-bit integers from the low half of `a` and `b`. +/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16) #[inline] @@ -1660,7 +1660,7 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(x) } -/// Unpack and interleave 32-bit integers from the low half of `a` and `b`. +/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32) #[inline] @@ -1671,7 +1671,7 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } -/// Unpack and interleave 64-bit integers from the low half of `a` and `b`. +/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64) #[inline] @@ -1682,7 +1682,7 @@ pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { mem::transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2])) } -/// Return a new vector with the low element of `a` replaced by the sum of the +/// Returns a new vector with the low element of `a` replaced by the sum of the /// low elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd) @@ -1694,7 +1694,7 @@ pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } -/// Add packed double-precision (64-bit) floating-point elements in `a` and +/// Adds packed double-precision (64-bit) floating-point elements in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd) @@ -1706,7 +1706,7 @@ pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { simd_add(a, b) } -/// Return a new vector with the low element of `a` replaced by the result of +/// Returns a new vector with the low element of `a` replaced by the result of /// diving the lower element of `a` by the lower element of `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd) @@ -1730,7 +1730,7 @@ pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { simd_div(a, b) } -/// Return a new vector with the low element of `a` replaced by the maximum +/// Returns a new vector with the low element of `a` replaced by the maximum /// of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd) @@ -1742,7 +1742,7 @@ pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { maxsd(a, b) } -/// Return a new vector with the maximum values from corresponding elements in +/// Returns a new vector with the maximum values from corresponding elements in /// `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd) @@ -1754,7 +1754,7 @@ pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { maxpd(a, b) } -/// Return a new vector with the low element of `a` replaced by the minimum +/// Returns a new vector with the low element of `a` replaced by the minimum /// of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd) @@ -1766,7 +1766,7 @@ pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { minsd(a, b) } -/// Return a new vector with the minimum values from corresponding elements in +/// Returns a new vector with the minimum values from corresponding elements in /// `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd) @@ -1778,7 +1778,7 @@ pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { minpd(a, b) } -/// Return a new vector with the low element of `a` replaced by multiplying the +/// Returns a new vector with the low element of `a` replaced by multiplying the /// low elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd) @@ -1790,7 +1790,7 @@ pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } -/// Multiply packed double-precision (64-bit) floating-point elements in `a` +/// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd) @@ -1802,7 +1802,7 @@ pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { simd_mul(a, b) } -/// Return a new vector with the low element of `a` replaced by the square +/// Returns a new vector with the low element of `a` replaced by the square /// root of the lower element `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd) @@ -1814,7 +1814,7 @@ pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) } -/// Return a new vector with the square root of each of the values in `a`. +/// Returns a new vector with the square root of each of the values in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd) #[inline] @@ -1825,7 +1825,7 @@ pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { sqrtpd(a) } -/// Return a new vector with the low element of `a` replaced by subtracting the +/// Returns a new vector with the low element of `a` replaced by subtracting the /// low element by `b` from the low element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd) @@ -1849,7 +1849,7 @@ pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { simd_sub(a, b) } -/// Compute the bitwise AND of packed double-precision (64-bit) floating-point +/// Computes the bitwise AND of packed double-precision (64-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd) @@ -1863,7 +1863,7 @@ pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { mem::transmute(_mm_and_si128(a, b)) } -/// Compute the bitwise NOT of `a` and then AND with `b`. +/// Computes the bitwise NOT of `a` and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd) #[inline] @@ -1876,7 +1876,7 @@ pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { mem::transmute(_mm_andnot_si128(a, b)) } -/// Compute the bitwise OR of `a` and `b`. +/// Computes the bitwise OR of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd) #[inline] @@ -1889,7 +1889,7 @@ pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { mem::transmute(_mm_or_si128(a, b)) } -/// Compute the bitwise OR of `a` and `b`. +/// Computes the bitwise OR of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd) #[inline] @@ -1902,7 +1902,7 @@ pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { mem::transmute(_mm_xor_si128(a, b)) } -/// Return a new vector with the low element of `a` replaced by the equality +/// Returns a new vector with the low element of `a` replaced by the equality /// comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd) @@ -1914,7 +1914,7 @@ pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 0) } -/// Return a new vector with the low element of `a` replaced by the less-than +/// Returns a new vector with the low element of `a` replaced by the less-than /// comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd) @@ -1926,7 +1926,7 @@ pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 1) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// less-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd) @@ -1938,7 +1938,7 @@ pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 2) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// greater-than comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd) @@ -1950,7 +1950,7 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// greater-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd) @@ -1962,7 +1962,7 @@ pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } -/// Return a new vector with the low element of `a` replaced by the result +/// Returns a new vector with the low element of `a` replaced by the result /// of comparing both of the lower elements of `a` and `b` to `NaN`. If /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` /// otherwise. @@ -1976,7 +1976,7 @@ pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 7) } -/// Return a new vector with the low element of `a` replaced by the result of +/// Returns a new vector with the low element of `a` replaced by the result of /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. /// @@ -1989,7 +1989,7 @@ pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 3) } -/// Return a new vector with the low element of `a` replaced by the not-equal +/// Returns a new vector with the low element of `a` replaced by the not-equal /// comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd) @@ -2001,7 +2001,7 @@ pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 4) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// not-less-than comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd) @@ -2013,7 +2013,7 @@ pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 5) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd) @@ -2025,7 +2025,7 @@ pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 6) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// not-greater-than comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd) @@ -2037,7 +2037,7 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } -/// Return a new vector with the low element of `a` replaced by the +/// Returns a new vector with the low element of `a` replaced by the /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd) @@ -2049,7 +2049,7 @@ pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } -/// Compare corresponding elements in `a` and `b` for equality. +/// Compares corresponding elements in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd) #[inline] @@ -2060,7 +2060,7 @@ pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 0) } -/// Compare corresponding elements in `a` and `b` for less-than. +/// Compares corresponding elements in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd) #[inline] @@ -2071,7 +2071,7 @@ pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 1) } -/// Compare corresponding elements in `a` and `b` for less-than-or-equal +/// Compares corresponding elements in `a` and `b` for less-than-or-equal /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd) #[inline] @@ -2082,7 +2082,7 @@ pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 2) } -/// Compare corresponding elements in `a` and `b` for greater-than. +/// Compares corresponding elements in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd) #[inline] @@ -2093,7 +2093,7 @@ pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmplt_pd(b, a) } -/// Compare corresponding elements in `a` and `b` for greater-than-or-equal. +/// Compares corresponding elements in `a` and `b` for greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd) #[inline] @@ -2104,7 +2104,7 @@ pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmple_pd(b, a) } -/// Compare corresponding elements in `a` and `b` to see if neither is `NaN`. +/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd) #[inline] @@ -2115,7 +2115,7 @@ pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 7) } -/// Compare corresponding elements in `a` and `b` to see if either is `NaN`. +/// Compares corresponding elements in `a` and `b` to see if either is `NaN`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd) #[inline] @@ -2126,7 +2126,7 @@ pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 3) } -/// Compare corresponding elements in `a` and `b` for not-equal. +/// Compares corresponding elements in `a` and `b` for not-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd) #[inline] @@ -2137,7 +2137,7 @@ pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 4) } -/// Compare corresponding elements in `a` and `b` for not-less-than. +/// Compares corresponding elements in `a` and `b` for not-less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd) #[inline] @@ -2148,7 +2148,7 @@ pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 5) } -/// Compare corresponding elements in `a` and `b` for not-less-than-or-equal. +/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd) #[inline] @@ -2159,7 +2159,7 @@ pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 6) } -/// Compare corresponding elements in `a` and `b` for not-greater-than. +/// Compares corresponding elements in `a` and `b` for not-greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd) #[inline] @@ -2170,7 +2170,7 @@ pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnlt_pd(b, a) } -/// Compare corresponding elements in `a` and `b` for +/// Compares corresponding elements in `a` and `b` for /// not-greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd) @@ -2182,7 +2182,7 @@ pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnle_pd(b, a) } -/// Compare the lower element of `a` and `b` for equality. +/// Compares the lower element of `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd) #[inline] @@ -2193,7 +2193,7 @@ pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { comieqsd(a, b) } -/// Compare the lower element of `a` and `b` for less-than. +/// Compares the lower element of `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd) #[inline] @@ -2204,7 +2204,7 @@ pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { comiltsd(a, b) } -/// Compare the lower element of `a` and `b` for less-than-or-equal. +/// Compares the lower element of `a` and `b` for less-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd) #[inline] @@ -2215,7 +2215,7 @@ pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { comilesd(a, b) } -/// Compare the lower element of `a` and `b` for greater-than. +/// Compares the lower element of `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd) #[inline] @@ -2226,7 +2226,7 @@ pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { comigtsd(a, b) } -/// Compare the lower element of `a` and `b` for greater-than-or-equal. +/// Compares the lower element of `a` and `b` for greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd) #[inline] @@ -2237,7 +2237,7 @@ pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { comigesd(a, b) } -/// Compare the lower element of `a` and `b` for not-equal. +/// Compares the lower element of `a` and `b` for not-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd) #[inline] @@ -2248,7 +2248,7 @@ pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { comineqsd(a, b) } -/// Compare the lower element of `a` and `b` for equality. +/// Compares the lower element of `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd) #[inline] @@ -2259,7 +2259,7 @@ pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { ucomieqsd(a, b) } -/// Compare the lower element of `a` and `b` for less-than. +/// Compares the lower element of `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd) #[inline] @@ -2270,7 +2270,7 @@ pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { ucomiltsd(a, b) } -/// Compare the lower element of `a` and `b` for less-than-or-equal. +/// Compares the lower element of `a` and `b` for less-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd) #[inline] @@ -2281,7 +2281,7 @@ pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { ucomilesd(a, b) } -/// Compare the lower element of `a` and `b` for greater-than. +/// Compares the lower element of `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd) #[inline] @@ -2292,7 +2292,7 @@ pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { ucomigtsd(a, b) } -/// Compare the lower element of `a` and `b` for greater-than-or-equal. +/// Compares the lower element of `a` and `b` for greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd) #[inline] @@ -2303,7 +2303,7 @@ pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { ucomigesd(a, b) } -/// Compare the lower element of `a` and `b` for not-equal. +/// Compares the lower element of `a` and `b` for not-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd) #[inline] @@ -2314,7 +2314,7 @@ pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { ucomineqsd(a, b) } -/// Convert packed double-precision (64-bit) floating-point elements in "a" to +/// Converts packed double-precision (64-bit) floating-point elements in "a" to /// packed single-precision (32-bit) floating-point elements /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps) @@ -2326,7 +2326,7 @@ pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { cvtpd2ps(a) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed /// double-precision (64-bit) floating-point elements. /// @@ -2339,7 +2339,7 @@ pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { cvtps2pd(a) } -/// Convert packed double-precision (64-bit) floating-point elements in `a` to +/// Converts packed double-precision (64-bit) floating-point elements in `a` to /// packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32) @@ -2351,7 +2351,7 @@ pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { mem::transmute(cvtpd2dq(a)) } -/// Convert the lower double-precision (64-bit) floating-point element in a to +/// Converts the lower double-precision (64-bit) floating-point element in a to /// a 32-bit integer. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32) @@ -2363,9 +2363,9 @@ pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { cvtsd2si(a) } -/// Convert the lower double-precision (64-bit) floating-point element in `b` +/// Converts the lower double-precision (64-bit) floating-point element in `b` /// to a single-precision (32-bit) floating-point element, store the result in -/// the lower element of the return value, and copy the upper element from `a` +/// the lower element of the return value, and copies the upper element from `a` /// to the upper element the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss) @@ -2377,7 +2377,7 @@ pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { cvtsd2ss(a, b) } -/// Return the lower double-precision (64-bit) floating-point element of "a". +/// Returns the lower double-precision (64-bit) floating-point element of "a". /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64) #[inline] @@ -2387,9 +2387,9 @@ pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { simd_extract(a, 0) } -/// Convert the lower single-precision (32-bit) floating-point element in `b` +/// Converts the lower single-precision (32-bit) floating-point element in `b` /// to a double-precision (64-bit) floating-point element, store the result in -/// the lower element of the return value, and copy the upper element from `a` +/// the lower element of the return value, and copies the upper element from `a` /// to the upper element the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd) @@ -2401,7 +2401,7 @@ pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { cvtss2sd(a, b) } -/// Convert packed double-precision (64-bit) floating-point elements in `a` to +/// Converts packed double-precision (64-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32) @@ -2413,7 +2413,7 @@ pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { mem::transmute(cvttpd2dq(a)) } -/// Convert the lower double-precision (64-bit) floating-point element in `a` +/// Converts the lower double-precision (64-bit) floating-point element in `a` /// to a 32-bit integer with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32) @@ -2425,7 +2425,7 @@ pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { cvttsd2si(a) } -/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32) @@ -2437,7 +2437,7 @@ pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { mem::transmute(cvttps2dq(a)) } -/// Copy double-precision (64-bit) floating-point element `a` to the lower +/// Copies double-precision (64-bit) floating-point element `a` to the lower /// element of the packed 64-bit return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd) @@ -2448,7 +2448,7 @@ pub unsafe fn _mm_set_sd(a: f64) -> __m128d { _mm_set_pd(0.0, a) } -/// Broadcast double-precision (64-bit) floating-point value a to all elements +/// Broadcasts double-precision (64-bit) floating-point value a to all elements /// of the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd) @@ -2459,7 +2459,7 @@ pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { _mm_set_pd(a, a) } -/// Broadcast double-precision (64-bit) floating-point value a to all elements +/// Broadcasts double-precision (64-bit) floating-point value a to all elements /// of the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1) @@ -2470,7 +2470,7 @@ pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { _mm_set_pd(a, a) } -/// Set packed double-precision (64-bit) floating-point elements in the return +/// Sets packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd) @@ -2481,7 +2481,7 @@ pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { __m128d(b, a) } -/// Set packed double-precision (64-bit) floating-point elements in the return +/// Sets packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd) @@ -2504,7 +2504,7 @@ pub unsafe fn _mm_setzero_pd() -> __m128d { _mm_set_pd(0.0, 0.0) } -/// Return a mask of the most significant bit of each element in `a`. +/// Returns a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. /// All other bits are set to `0`. @@ -2518,7 +2518,7 @@ pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { movmskpd(a) } -/// Load 128-bits (composed of 2 packed double-precision (64-bit) +/// Loads 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory into the returned vector. /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception may be generated. @@ -2598,7 +2598,7 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 0) } -/// Store 128-bits (composed of 2 packed double-precision (64-bit) +/// Stores 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. `mem_addr` must be aligned /// on a 16-byte boundary or a general-protection exception may be generated. /// @@ -2612,7 +2612,7 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { *(mem_addr as *mut __m128d) = a; } -/// Store 128-bits (composed of 2 packed double-precision (64-bit) +/// Stores 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -2625,7 +2625,7 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { storeupd(mem_addr as *mut i8, a); } -/// Store the lower double-precision (64-bit) floating-point element from `a` +/// Stores the lower double-precision (64-bit) floating-point element from `a` /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. /// @@ -2639,7 +2639,7 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { *(mem_addr as *mut __m128d) = b; } -/// Store the lower double-precision (64-bit) floating-point element from `a` +/// Stores the lower double-precision (64-bit) floating-point element from `a` /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. /// @@ -2653,7 +2653,7 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { *(mem_addr as *mut __m128d) = b; } -/// Store 2 double-precision (64-bit) floating-point elements from `a` into +/// Stores 2 double-precision (64-bit) floating-point elements from `a` into /// memory in reverse order. /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception may be generated. @@ -2692,7 +2692,7 @@ pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 0); } -/// Load a double-precision (64-bit) floating-point element from memory +/// Loads a double-precision (64-bit) floating-point element from memory /// into both elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd) @@ -2705,7 +2705,7 @@ pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { _mm_setr_pd(d, d) } -/// Load a double-precision (64-bit) floating-point element from memory +/// Loads a double-precision (64-bit) floating-point element from memory /// into both elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1) @@ -2717,7 +2717,7 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { _mm_load1_pd(mem_addr) } -/// Load 2 double-precision (64-bit) floating-point elements from memory into +/// Loads 2 double-precision (64-bit) floating-point elements from memory into /// the returned vector in reverse order. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. /// @@ -2731,7 +2731,7 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { simd_shuffle2(a, a, [1, 0]) } -/// Load 128-bits (composed of 2 packed double-precision (64-bit) +/// Loads 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory into the returned vector. /// `mem_addr` does not need to be aligned on any particular boundary. /// @@ -2848,7 +2848,7 @@ pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { mem::transmute(a) } -/// Return vector of type __m128d with undefined elements. +/// Returns vector of type __m128d with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd) #[inline] @@ -2859,7 +2859,7 @@ pub unsafe fn _mm_undefined_pd() -> __m128d { mem::MaybeUninit::<__m128d>::uninitialized().into_initialized() } -/// Return vector of type __m128i with undefined elements. +/// Returns vector of type __m128i with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128) #[inline] @@ -4797,7 +4797,7 @@ mod tests { let mut ofs = 0; let mut p = vals.as_mut_ptr(); - // Make sure p is *not* aligned to 16-byte boundary + // Make sure p is **not** aligned to 16-byte boundary if (p as usize) & 0xf == 0 { ofs = 1; p = p.offset(1); diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs index 394909763da..d78cd43b993 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse3.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs @@ -31,7 +31,7 @@ pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { addsubpd(a, b) } -/// Horizontally add adjacent pairs of double-precision (64-bit) +/// Horizontally adds adjacent pairs of double-precision (64-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd) @@ -43,7 +43,7 @@ pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { haddpd(a, b) } -/// Horizontally add adjacent pairs of single-precision (32-bit) +/// Horizontally adds adjacent pairs of single-precision (32-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps) @@ -67,7 +67,7 @@ pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { hsubpd(a, b) } -/// Horizontally add adjacent pairs of single-precision (32-bit) +/// Horizontally adds adjacent pairs of single-precision (32-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps) @@ -79,7 +79,7 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { hsubps(a, b) } -/// Load 128-bits of integer data from unaligned memory. +/// Loads 128-bits of integer data from unaligned memory. /// This intrinsic may perform better than `_mm_loadu_si128` /// when the data crosses a cache line boundary. /// @@ -104,7 +104,7 @@ pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d { simd_shuffle2(a, a, [0, 0]) } -/// Load a double-precision (64-bit) floating-point element from memory +/// Loads a double-precision (64-bit) floating-point element from memory /// into both elements of return vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd) diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 10ab260d8f5..027c268de72 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -154,7 +154,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 { constify_imm4!(imm4, call) } -/// Extract a single-precision (32-bit) floating-point element from `a`, +/// Extracts a single-precision (32-bit) floating-point element from `a`, /// selected with `imm8` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps) @@ -170,7 +170,7 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 { mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11)) } -/// Extract an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit +/// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. @@ -186,7 +186,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32 } -/// Extract an 32-bit integer from `a` selected with `imm8` +/// Extracts an 32-bit integer from `a` selected with `imm8` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32) #[inline] @@ -240,7 +240,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { constify_imm8!(imm8, call) } -/// Return a copy of `a` with the 8-bit integer from `i` inserted at a +/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8) @@ -253,7 +253,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8)) } -/// Return a copy of `a` with the 32-bit integer from `i` inserted at a +/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32) @@ -266,7 +266,7 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i { mem::transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i)) } -/// Compare packed 8-bit integers in `a` and `b` and return packed maximum +/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum /// values in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8) @@ -278,7 +278,7 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxsb(a.as_i8x16(), b.as_i8x16())) } -/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed /// maximum. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16) @@ -290,7 +290,7 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxuw(a.as_u16x8(), b.as_u16x8())) } -/// Compare packed 32-bit integers in `a` and `b`, and return packed maximum +/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum /// values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32) @@ -302,7 +302,7 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxsd(a.as_i32x4(), b.as_i32x4())) } -/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32) @@ -314,7 +314,7 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaxud(a.as_u32x4(), b.as_u32x4())) } -/// Compare packed 8-bit integers in `a` and `b` and return packed minimum +/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum /// values in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8) @@ -326,7 +326,7 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminsb(a.as_i8x16(), b.as_i8x16())) } -/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed +/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed /// minimum. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16) @@ -338,7 +338,7 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminuw(a.as_u16x8(), b.as_u16x8())) } -/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum +/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum /// values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32) @@ -350,7 +350,7 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminsd(a.as_i32x4(), b.as_i32x4())) } -/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed +/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32) @@ -362,7 +362,7 @@ pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pminud(a.as_u32x4(), b.as_u32x4())) } -/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32) @@ -374,7 +374,7 @@ pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } -/// Compare packed 64-bit integers in `a` and `b` for equality +/// Compares packed 64-bit integers in `a` and `b` for equality /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64) #[inline] @@ -464,7 +464,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { mem::transmute(simd_cast::<_, i64x2>(a)) } -/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers +/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16) #[inline] @@ -477,7 +477,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { mem::transmute(simd_cast::<_, i16x8>(a)) } -/// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers +/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32) #[inline] @@ -490,7 +490,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { mem::transmute(simd_cast::<_, i32x4>(a)) } -/// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers +/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64) #[inline] @@ -503,7 +503,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { mem::transmute(simd_cast::<_, i64x2>(a)) } -/// Zero extend packed unsigned 16-bit integers in `a` +/// Zeroes extend packed unsigned 16-bit integers in `a` /// to packed 32-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32) @@ -517,7 +517,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { mem::transmute(simd_cast::<_, i32x4>(a)) } -/// Zero extend packed unsigned 16-bit integers in `a` +/// Zeroes extend packed unsigned 16-bit integers in `a` /// to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64) @@ -531,7 +531,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { mem::transmute(simd_cast::<_, i64x2>(a)) } -/// Zero extend packed unsigned 32-bit integers in `a` +/// Zeroes extend packed unsigned 32-bit integers in `a` /// to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64) @@ -592,7 +592,7 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { } /// Round the packed double-precision (64-bit) floating-point elements in `a` -/// down to an integer value, and store the results as packed double-precision +/// down to an integer value, and stores the results as packed double-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd) @@ -605,7 +605,7 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { } /// Round the packed single-precision (32-bit) floating-point elements in `a` -/// down to an integer value, and store the results as packed single-precision +/// down to an integer value, and stores the results as packed single-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps) @@ -620,7 +620,7 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { /// Round the lower double-precision (64-bit) floating-point element in `b` /// down to an integer value, store the result as a double-precision /// floating-point element in the lower element of the intrinsic result, -/// and copy the upper element from `a` to the upper element of the intrinsic +/// and copies the upper element from `a` to the upper element of the intrinsic /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd) @@ -635,7 +635,7 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { /// Round the lower single-precision (32-bit) floating-point element in `b` /// down to an integer value, store the result as a single-precision /// floating-point element in the lower element of the intrinsic result, -/// and copy the upper 3 packed elements from `a` to the upper elements +/// and copies the upper 3 packed elements from `a` to the upper elements /// of the intrinsic result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss) @@ -648,7 +648,7 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { } /// Round the packed double-precision (64-bit) floating-point elements in `a` -/// up to an integer value, and store the results as packed double-precision +/// up to an integer value, and stores the results as packed double-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd) @@ -661,7 +661,7 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { } /// Round the packed single-precision (32-bit) floating-point elements in `a` -/// up to an integer value, and store the results as packed single-precision +/// up to an integer value, and stores the results as packed single-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps) @@ -676,7 +676,7 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { /// Round the lower double-precision (64-bit) floating-point element in `b` /// up to an integer value, store the result as a double-precision /// floating-point element in the lower element of the intrisic result, -/// and copy the upper element from `a` to the upper element +/// and copies the upper element from `a` to the upper element /// of the intrinsic result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd) @@ -691,7 +691,7 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { /// Round the lower single-precision (32-bit) floating-point element in `b` /// up to an integer value, store the result as a single-precision /// floating-point element in the lower element of the intrinsic result, -/// and copy the upper 3 packed elements from `a` to the upper elements +/// and copies the upper 3 packed elements from `a` to the upper elements /// of the intrinsic result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss) @@ -704,7 +704,7 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { } /// Round the packed double-precision (64-bit) floating-point elements in `a` -/// using the `rounding` parameter, and store the results as packed +/// using the `rounding` parameter, and stores the results as packed /// double-precision floating-point elements. /// Rounding is done according to the rounding parameter, which can be one of: /// @@ -749,7 +749,7 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d { } /// Round the packed single-precision (32-bit) floating-point elements in `a` -/// using the `rounding` parameter, and store the results as packed +/// using the `rounding` parameter, and stores the results as packed /// single-precision floating-point elements. /// Rounding is done according to the rounding parameter, which can be one of: /// @@ -796,7 +796,7 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 { /// Round the lower double-precision (64-bit) floating-point element in `b` /// using the `rounding` parameter, store the result as a double-precision /// floating-point element in the lower element of the intrinsic result, -/// and copy the upper element from `a` to the upper element of the intrinsic +/// and copies the upper element from `a` to the upper element of the intrinsic /// result. /// Rounding is done according to the rounding parameter, which can be one of: /// @@ -843,7 +843,7 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { /// Round the lower single-precision (32-bit) floating-point element in `b` /// using the `rounding` parameter, store the result as a single-precision /// floating-point element in the lower element of the intrinsic result, -/// and copy the upper 3 packed elements from `a` to the upper elements +/// and copies the upper 3 packed elements from `a` to the upper elements /// of the instrinsic result. /// Rounding is done according to the rounding parameter, which can be one of: /// @@ -916,8 +916,8 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { mem::transmute(phminposuw(a.as_u16x8())) } -/// Multiply the low 32-bit integers from each packed 64-bit -/// element in `a` and `b`, and return the signed 64-bit result. +/// Multiplies the low 32-bit integers from each packed 64-bit +/// element in `a` and `b`, and returns the signed 64-bit result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32) #[inline] @@ -928,7 +928,7 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmuldq(a.as_i32x4(), b.as_i32x4())) } -/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate +/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate /// 64-bit integers, and returns the lowest 32-bit, whatever they might be, /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2), /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs index 5862673781f..0eae371e344 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse42.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs @@ -39,13 +39,13 @@ pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; /// Do not negate results *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; -/// Negate results +/// Negates results #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; /// Do not negate results before the end of the string #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; -/// Negate results only before the end of the string +/// Negates results only before the end of the string #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; @@ -63,7 +63,7 @@ pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return the generated mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm) @@ -83,7 +83,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { mem::transmute(constify_imm8!(imm8, call)) } -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8` and return the generated index. Similar to /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the /// lengths of `a` and `b` to be explicitly specified. @@ -115,7 +115,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// /// # Examples /// -/// Find a substring using [`_SIDD_CMP_EQUAL_ORDERED`] +/// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ``` /// #[cfg(target_arch = "x86")] @@ -187,7 +187,7 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # } /// ``` /// -/// Find the index of the first character in the haystack that is within a +/// Finds the index of the first character in the haystack that is within a /// range of characters. /// /// ``` @@ -291,7 +291,7 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 { constify_imm8!(imm8, call) } -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if any character in `b` was null. /// and `0` otherwise. /// @@ -312,7 +312,7 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 { constify_imm8!(imm8, call) } -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if the resulting mask was non-zero, /// and `0` otherwise. /// @@ -333,7 +333,7 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 { constify_imm8!(imm8, call) } -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and returns `1` if any character in `a` was null, /// and `0` otherwise. /// @@ -354,7 +354,7 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 { constify_imm8!(imm8, call) } -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return bit `0` of the resulting bit mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro) @@ -374,7 +374,7 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 { constify_imm8!(imm8, call) } -/// Compare packed strings with implicit lengths in `a` and `b` using the +/// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if `b` did not contain a null /// character and the resulting mask was zero, and `0` otherwise. /// @@ -395,7 +395,7 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 { constify_imm8!(imm8, call) } -/// Compare packed strings in `a` and `b` with lengths `la` and `lb` +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return the generated mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm) @@ -415,7 +415,7 @@ pub unsafe fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) mem::transmute(constify_imm8!(imm8, call)) } -/// Compare packed strings `a` and `b` with lengths `la` and `lb` using the +/// Compares packed strings `a` and `b` with lengths `la` and `lb` using the /// control in `imm8` and return the generated index. Similar to /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly /// determines the length of `a` and `b`. @@ -510,7 +510,7 @@ pub unsafe fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) constify_imm8!(imm8, call) } -/// Compare packed strings in `a` and `b` with lengths `la` and `lb` +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if any character in /// `b` was null, and `0` otherwise. /// @@ -531,7 +531,7 @@ pub unsafe fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) constify_imm8!(imm8, call) } -/// Compare packed strings in `a` and `b` with lengths `la` and `lb` +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if the resulting mask /// was non-zero, and `0` otherwise. /// @@ -552,7 +552,7 @@ pub unsafe fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) constify_imm8!(imm8, call) } -/// Compare packed strings in `a` and `b` with lengths `la` and `lb` +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if any character in /// a was null, and `0` otherwise. /// @@ -573,7 +573,7 @@ pub unsafe fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) constify_imm8!(imm8, call) } -/// Compare packed strings in `a` and `b` with lengths `la` and `lb` +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return bit `0` of the resulting /// bit mask. /// @@ -594,7 +594,7 @@ pub unsafe fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) constify_imm8!(imm8, call) } -/// Compare packed strings in `a` and `b` with lengths `la` and `lb` +/// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if `b` did not /// contain a null character and the resulting mask was zero, and `0` /// otherwise. @@ -652,7 +652,7 @@ pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { crc32_32_32(crc, v) } -/// Compare packed 64-bit integers in `a` and `b` for greater-than, +/// Compares packed 64-bit integers in `a` and `b` for greater-than, /// return the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64) diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs index a013ab6551a..c3d78571a5e 100644 --- a/library/stdarch/crates/core_arch/src/x86/ssse3.rs +++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs @@ -8,7 +8,7 @@ use mem; #[cfg(test)] use stdsimd_test::assert_instr; -/// Compute the absolute value of packed 8-bit signed integers in `a` and +/// Computes the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8) @@ -20,7 +20,7 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { mem::transmute(pabsb128(a.as_i8x16())) } -/// Compute the absolute value of each of the packed 16-bit signed integers in +/// Computes the absolute value of each of the packed 16-bit signed integers in /// `a` and /// return the 16-bit unsigned integer /// @@ -33,7 +33,7 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { mem::transmute(pabsw128(a.as_i16x8())) } -/// Compute the absolute value of each of the packed 32-bit signed integers in +/// Computes the absolute value of each of the packed 32-bit signed integers in /// `a` and /// return the 32-bit unsigned integer /// @@ -46,7 +46,7 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { mem::transmute(pabsd128(a.as_i32x4())) } -/// Shuffle bytes from `a` according to the content of `b`. +/// Shuffles bytes from `a` according to the content of `b`. /// /// The last 4 bits of each byte of `b` are used as addresses /// into the 16 bytes of `a`. @@ -81,7 +81,7 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { } /// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result, -/// shift the result right by `n` bytes, and return the low 16 bytes. +/// shift the result right by `n` bytes, and returns the low 16 bytes. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8) #[inline] @@ -154,7 +154,7 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i { mem::transmute(r) } -/// Horizontally add the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of `[8 x i16]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16) @@ -166,7 +166,7 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) } -/// Horizontally add the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. /// @@ -179,7 +179,7 @@ pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) } -/// Horizontally add the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of `[4 x i32]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32) @@ -229,7 +229,7 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) } -/// Multiply corresponding pairs of packed 8-bit unsigned integer +/// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, add pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to @@ -244,7 +244,7 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) } -/// Multiply packed 16-bit signed integer values, truncate the 32-bit +/// Multiplies packed 16-bit signed integer values, truncate the 32-bit /// product to the 18 most significant bits by right-shifting, round the /// truncated value by adding 1, and write bits `[16:1]` to the destination. /// @@ -257,8 +257,8 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) } -/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit -/// integer in `b` is negative, and return the result. +/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit +/// integer in `b` is negative, and returns the result. /// Elements in result are zeroed out when the corresponding element in `b` /// is zero. /// @@ -271,8 +271,8 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16())) } -/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit -/// integer in `b` is negative, and return the results. +/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit +/// integer in `b` is negative, and returns the results. /// Elements in result are zeroed out when the corresponding element in `b` /// is zero. /// @@ -285,8 +285,8 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8())) } -/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit -/// integer in `b` is negative, and return the results. +/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit +/// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` /// is zero. /// @@ -299,7 +299,7 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } -/// Compute the absolute value of packed 8-bit integers in `a` and +/// Computes the absolute value of packed 8-bit integers in `a` and /// return the unsigned results. #[inline] #[target_feature(enable = "ssse3,mmx")] @@ -308,7 +308,7 @@ pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 { pabsb(a) } -/// Compute the absolute value of packed 8-bit integers in `a`, and return the +/// Computes the absolute value of packed 8-bit integers in `a`, and returns the /// unsigned results. #[inline] #[target_feature(enable = "ssse3,mmx")] @@ -317,7 +317,7 @@ pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { pabsw(a) } -/// Compute the absolute value of packed 32-bit integers in `a`, and return the +/// Computes the absolute value of packed 32-bit integers in `a`, and returns the /// unsigned results. #[inline] #[target_feature(enable = "ssse3,mmx")] @@ -326,8 +326,8 @@ pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 { pabsd(a) } -/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in -/// the corresponding 8-bit element of `b`, and return the results +/// Shuffles packed 8-bit integers in `a` according to shuffle control mask in +/// the corresponding 8-bit element of `b`, and returns the results #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pshufb))] @@ -350,7 +350,7 @@ pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { constify_imm8!(n, call) } -/// Horizontally add the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of `[4 x i16]`. #[inline] #[target_feature(enable = "ssse3,mmx")] @@ -359,7 +359,7 @@ pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { phaddw(a, b) } -/// Horizontally add the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of `[2 x i32]`. #[inline] #[target_feature(enable = "ssse3,mmx")] @@ -368,7 +368,7 @@ pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 { phaddd(a, b) } -/// Horizontally add the adjacent pairs of values contained in 2 packed +/// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. #[inline] @@ -429,8 +429,8 @@ pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 { pmulhrsw(a, b) } -/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit -/// integer in `b` is negative, and return the results. +/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit +/// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` is /// zero. #[inline] @@ -440,8 +440,8 @@ pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 { psignb(a, b) } -/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit -/// integer in `b` is negative, and return the results. +/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit +/// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` is /// zero. #[inline] @@ -451,8 +451,8 @@ pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { psignw(a, b) } -/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit -/// integer in `b` is negative, and return the results. +/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit +/// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` is /// zero. #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs index 4c7f5338a96..35f00e230b5 100644 --- a/library/stdarch/crates/core_arch/src/x86/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs @@ -23,7 +23,7 @@ extern "C" { fn xrstors(p: *const u8, hi: u32, lo: u32) -> (); } -/// Perform a full or partial save of the enabled processor states to memory at +/// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and XCR0. @@ -41,7 +41,7 @@ pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) { xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial restore of the enabled processor states using +/// Performs a full or partial restore of the enabled processor states using /// the state information stored in memory at `mem_addr`. /// /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and @@ -63,7 +63,7 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) { #[stable(feature = "simd_x86", since = "1.27.0")] pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; -/// Copy 64-bits from `val` to the extended control register (`XCR`) specified +/// Copies 64-bits from `val` to the extended control register (`XCR`) specified /// by `a`. /// /// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported. @@ -92,7 +92,7 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { ((edx as u64) << 32) | (eax as u64) } -/// Perform a full or partial save of the enabled processor states to memory at +/// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. @@ -109,7 +109,7 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) { xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial save of the enabled processor states to memory +/// Performs a full or partial save of the enabled processor states to memory /// at `mem_addr`. /// /// `xsavec` differs from `xsave` in that it uses compaction and that it may @@ -125,7 +125,7 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) { xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial save of the enabled processor states to memory at +/// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr` /// /// `xsaves` differs from xsave in that it can save state components @@ -142,7 +142,7 @@ pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) { xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial restore of the enabled processor states using the +/// Performs a full or partial restore of the enabled processor states using the /// state information stored in memory at `mem_addr`. /// /// `xrstors` differs from `xrstor` in that it can restore state components diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs index 812fff0b865..761b0e25e57 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -11,8 +11,8 @@ extern "unadjusted" { fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64); } -/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in` -/// (carry flag), and store the unsigned 64-bit result in out, and the carry-out +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry flag), and store the unsigned 64-bit result in `out`, and the carry-out /// is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(adc))] @@ -23,8 +23,8 @@ pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { a } -/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in` -/// (carry or overflow flag), and store the unsigned 64-bit result in out, and +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[target_feature(enable = "adx")] @@ -35,8 +35,8 @@ pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { llvm_addcarryx_u64(c_in, a, b, out as *mut _ as *mut u8) } -/// Add unsigned 64-bit integers a and b with unsigned 8-bit carry-in `c_in` -/// (carry or overflow flag), and store the unsigned 64-bit result in out, and +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`. +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(sbb))] diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs index 429ee75c59a..bd4414a733b 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs @@ -17,7 +17,7 @@ use core_arch::simd_llvm::*; use core_arch::x86::*; use mem; -/// Copy `a` to result, and insert the 64-bit integer `i` into result +/// Copies `a` to result, and insert the 64-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64) diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs index a27f3125751..deab866f721 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs @@ -21,7 +21,7 @@ use core_arch::simd_llvm::*; use core_arch::x86::*; -/// Extract a 64-bit integer from `a`, selected with `imm8`. +/// Extracts a 64-bit integer from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs index be3ced9e251..dc86cb0cb22 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs @@ -52,7 +52,7 @@ pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { !a & b } -/// Extract lowest set isolated bit. +/// Extracts lowest set isolated bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64) #[inline] @@ -64,7 +64,7 @@ pub unsafe fn _blsi_u64(x: u64) -> u64 { x & x.wrapping_neg() } -/// Get mask up to lowest set bit. +/// Gets mask up to lowest set bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs index 5e0ab83f681..4b1d1931360 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs @@ -30,7 +30,7 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { result as u64 } -/// Zero higher bits of `a` >= `index`. +/// Zeroes higher bits of `a` >= `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs index ba121d67006..6a94a42fc60 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -5,7 +5,7 @@ #[cfg(test)] use stdsimd_test::assert_instr; -/// Return an integer with the reversed byte order of x +/// Returns an integer with the reversed byte order of x /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs index 822bfc2fb3d..c48ed7525ba 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs @@ -3,7 +3,7 @@ use sync::atomic::Ordering; #[cfg(test)] use stdsimd_test::assert_instr; -/// Compare and exchange 16 bytes (128 bits) of data atomically. +/// Compares and exchange 16 bytes (128 bits) of data atomically. /// /// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64` /// processors. It performs an atomic compare-and-swap, updating the `ptr` diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs index aef4f638ebe..40b781d4062 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -16,7 +16,7 @@ extern "unadjusted" { use stdsimd_test::assert_instr; /// Read a hardware generated 64-bit random value and store the result in val. -/// Return 1 if a random value was generated, and 0 otherwise. +/// Returns 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs index a3126e72e94..765bd7b9467 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs @@ -15,7 +15,7 @@ extern "C" { fn cvtsi642ss(a: __m128, b: i64) -> __m128; } -/// Convert the lowest 32 bit float in the input vector to a 64 bit integer. +/// Converts the lowest 32 bit float in the input vector to a 64 bit integer. /// /// The result is rounded according to the current rounding mode. If the result /// cannot be represented as a 64 bit integer the result will be @@ -34,7 +34,7 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { cvtss2si64(a) } -/// Convert the lowest 32 bit float in the input vector to a 64 bit integer +/// Converts the lowest 32 bit float in the input vector to a 64 bit integer /// with truncation. /// /// The result is rounded always using truncation (round towards zero). If the @@ -53,7 +53,7 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { cvttss2si64(a) } -/// Convert a 64 bit integer to a 32 bit float. The result vector is the input +/// Converts a 64 bit integer to a 32 bit float. The result vector is the input /// vector `a` with the lowest 32 bit float replaced by the converted integer. /// /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs index 779be0a5930..570a75236ae 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs @@ -15,7 +15,7 @@ extern "C" { fn cvttsd2si64(a: __m128d) -> i64; } -/// Convert the lower double-precision (64-bit) floating-point element in a to +/// Converts the lower double-precision (64-bit) floating-point element in a to /// a 64-bit integer. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64) @@ -38,7 +38,7 @@ pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { _mm_cvtsd_si64(a) } -/// Convert the lower double-precision (64-bit) floating-point element in `a` +/// Converts the lower double-precision (64-bit) floating-point element in `a` /// to a 64-bit integer with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64) @@ -74,7 +74,7 @@ pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { intrinsics::nontemporal_store(mem_addr, a); } -/// Return a vector whose lowest element is `a` and all higher elements are +/// Returns a vector whose lowest element is `a` and all higher elements are /// `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128) @@ -86,7 +86,7 @@ pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { _mm_set_epi64x(0, a) } -/// Return a vector whose lowest element is `a` and all higher elements are +/// Returns a vector whose lowest element is `a` and all higher elements are /// `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128) @@ -98,7 +98,7 @@ pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { _mm_cvtsi64_si128(a) } -/// Return the lowest element of `a`. +/// Returns the lowest element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64) #[inline] @@ -109,7 +109,7 @@ pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { simd_extract(a.as_i64x2(), 0) } -/// Return the lowest element of `a`. +/// Returns the lowest element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x) #[inline] @@ -120,7 +120,7 @@ pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { _mm_cvtsi128_si64(a) } -/// Return `a` with its lower element replaced by `b` after converting it to +/// Returns `a` with its lower element replaced by `b` after converting it to /// an `f64`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd) @@ -132,7 +132,7 @@ pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { simd_insert(a, 0, b as f64) } -/// Return `a` with its lower element replaced by `b` after converting it to +/// Returns `a` with its lower element replaced by `b` after converting it to /// an `f64`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd) diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs index 9a22370019c..cbad2609cd1 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs @@ -7,7 +7,7 @@ use mem; #[cfg(test)] use stdsimd_test::assert_instr; -/// Extract an 64-bit integer from `a` selected with `imm8` +/// Extracts an 64-bit integer from `a` selected with `imm8` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64) #[inline] @@ -20,7 +20,7 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { simd_extract(a.as_i64x2(), imm8) } -/// Return a copy of `a` with the 64-bit integer from `i` inserted at a +/// Returns a copy of `a` with the 64-bit integer from `i` inserted at a /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64) diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs index 7531ac5832e..50f1f5481b3 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -21,7 +21,7 @@ extern "C" { fn xrstors64(p: *const u8, hi: u32, lo: u32) -> (); } -/// Perform a full or partial save of the enabled processor states to memory at +/// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and XCR0. @@ -39,7 +39,7 @@ pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial restore of the enabled processor states using +/// Performs a full or partial restore of the enabled processor states using /// the state information stored in memory at `mem_addr`. /// /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and @@ -55,7 +55,7 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) { xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } -/// Perform a full or partial save of the enabled processor states to memory at +/// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. @@ -72,7 +72,7 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial save of the enabled processor states to memory +/// Performs a full or partial save of the enabled processor states to memory /// at `mem_addr`. /// /// `xsavec` differs from `xsave` in that it uses compaction and that it may @@ -88,7 +88,7 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial save of the enabled processor states to memory at +/// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr` /// /// `xsaves` differs from xsave in that it can save state components @@ -105,7 +105,7 @@ pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } -/// Perform a full or partial restore of the enabled processor states using the +/// Performs a full or partial restore of the enabled processor states using the /// state information stored in memory at `mem_addr`. /// /// `xrstors` differs from `xrstor` in that it can restore state components diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 6a3e11de3df..580d7111bdd 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -6,9 +6,8 @@ //! whether a feature is available (bit is set) or unavaiable (bit is cleared). //! //! The enum `Feature` is used to map bit positions to feature names, and the -//! the `__crate::detect::check_for!` macro is used to map string literals (e.g. -//! "avx") to these bit positions (e.g. `Feature::avx`). -//! +//! the `__crate::detect::check_for!` macro is used to map string literals (e.g., +//! "avx") to these bit positions (e.g., `Feature::avx`). //! //! The run-time feature detection is performed by the //! `__crate::detect::check_for(Feature) -> bool` function. On its first call, @@ -20,7 +19,7 @@ /// x86/x86-64 platforms. /// /// This macro is provided in the standard library and will detect at runtime -/// whether the specified CPU feature is detected. This does *not* resolve at +/// whether the specified CPU feature is detected. This does **not** resolve at /// compile time unless the specified feature is already enabled for the entire /// crate. Runtime detection currently relies mostly on the `cpuid` instruction. /// diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index ee1914875cf..ac1f3e4fae7 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -147,7 +147,7 @@ impl Cache { } } -/// Test the `bit` of the storage. If the storage has not been initialized, +/// Tests the `bit` of the storage. If the storage has not been initialized, /// initializes it with the result of `f()`. /// /// On its first invocation, it detects the CPU features and caches them in the diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs index 6ccdbbc88a8..5d935a26c89 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -134,7 +134,7 @@ fn getauxval(key: usize) -> Result { fn auxv_from_file(file: &str) -> Result { let mut file = File::open(file).map_err(|_| ())?; - // See https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h + // See . // // The auxiliary vector contains at most 32 (key,value) fields: from // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs index dec44401d9b..5c9b27f669a 100644 --- a/library/stdarch/crates/stdsimd-test/src/lib.rs +++ b/library/stdarch/crates/stdsimd-test/src/lib.rs @@ -23,7 +23,7 @@ pub use assert_instr_macro::*; pub use simd_test_macro::*; use std::{collections::HashMap, env, str}; -// println! doesn't work on wasm32 right now, so shadow the compiler's println! +// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!` // macro with our own shim that redirects to `console.log`. #[allow(unused)] #[cfg(target_arch = "wasm32")] @@ -64,7 +64,7 @@ fn normalize(symbol: &str) -> String { None => symbol.to_string(), }; // Normalize to no leading underscore to handle platforms that may - // inject extra ones in symbol names + // inject extra ones in symbol names. while ret.starts_with("_") { ret.remove(0); } @@ -90,9 +90,9 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // function, returning if we do indeed find it. let mut found = false; for instr in instrs { - // Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax + // Get the first instruction, e.g., tzcntl in tzcntl %rax,%rax. if let Some(part) = instr.parts.get(0) { - // Truncates the instruction with the length of the expected + // Truncate the instruction with the length of the expected // instruction: tzcntl => tzcnt and compares that. if part.starts_with(expected) { found = true; @@ -136,17 +136,15 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { .ok() .map_or_else( || match expected { - // cpuid returns a pretty big aggregate structure so exempt it - // from the slightly more restrictive 22 - // instructions below + // `cpuid` returns a pretty big aggregate structure, so exempt + // it from the slightly more restrictive 22 instructions below. "cpuid" => 30, - // Apparently on Windows LLVM generates a bunch of - // saves/restores of xmm registers around these - // intstructions which blows the 20 limit - // below. As it seems dictates by Windows's abi - // (I guess?) we probably can't do much - // about it... + // Apparently, on Windows, LLVM generates a bunch of + // saves/restores of xmm registers around these intstructions, + // which exceeds the limit of 20 below. As it seems dictated by + // Windows's ABI (I believe?), we probably can't do much + // about it. "vzeroall" | "vzeroupper" if cfg!(windows) => 30, // Intrinsics using `cvtpi2ps` are typically "composites" and @@ -154,9 +152,8 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { "cvtpi2ps" => 25, // Original limit was 20 instructions, but ARM DSP Intrinsics - // are exactly 20 instructions long. So bump - // the limit to 22 instead of adding here a - // long list of exceptions. + // are exactly 20 instructions long. So, bump the limit to 22 + // instead of adding here a long list of exceptions. _ => 22, }, |v| v.parse().unwrap(), -- cgit 1.4.1-3-g733a5 From b322f1c03aa5de204317163322b5e22a96078d0d Mon Sep 17 00:00:00 2001 From: Alexander Regueiro Date: Mon, 18 Feb 2019 15:36:32 +0000 Subject: A few cosmetic improvements. --- library/stdarch/crates/core_arch/src/x86/bmi1.rs | 6 +++--- library/stdarch/crates/core_arch/src/x86/cpuid.rs | 2 +- library/stdarch/crates/core_arch/src/x86/sse.rs | 12 ++++++------ library/stdarch/crates/core_arch/src/x86/tbm.rs | 4 ++-- library/stdarch/crates/core_arch/src/x86_64/bmi.rs | 6 +++--- .../stdarch/crates/std_detect/src/detect/os/linux/mips.rs | 4 ++-- library/stdarch/crates/stdsimd-verify/tests/arm.rs | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs index 47a62d1f64b..1e37496a441 100644 --- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs +++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs @@ -87,7 +87,7 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 { /// Counts the number of trailing least significant zero bits. /// -/// When the source operand is 0, it returns its size in bits. +/// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32) #[inline] @@ -100,7 +100,7 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 { /// Counts the number of trailing least significant zero bits. /// -/// When the source operand is 0, it returns its size in bits. +/// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32) #[inline] @@ -164,7 +164,7 @@ mod tests { #[simd_test(enable = "bmi1")] unsafe fn test_blsr_u32() { - // TODO: test the behavior when the input is 0 + // TODO: test the behavior when the input is `0`. let r = _blsr_u32(0b0011_0000u32); assert_eq!(r, 0b0010_0000u32); } diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs index d796995ad38..4b639619f28 100644 --- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs +++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs @@ -148,7 +148,7 @@ pub fn has_cpuid() -> bool { // was modified or not. // // If the result is zero, the cpuid bit was not modified. - // If the result is 0x200000 (non-zero), then the cpuid + // If the result is `0x200000` (non-zero), then the cpuid // was correctly modified and the CPU supports the cpuid // instruction: (result & 0x200000) != 0 diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index 96ea08697c0..8e2053da5e4 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -1126,8 +1126,8 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { all(test, target_arch = "x86", not(target_feature = "sse2")), assert_instr(shufps) )] -// TODO: This function is actually not limited to floats, but that's what -// what matches the C type most closely: (__m128, *const __m64) -> __m128 +// TODO: this function is actually not limited to floats, but that's what +// what matches the C type most closely: `(__m128, *const __m64) -> __m128`. pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { let q = p as *const f32x2; let b: f32x2 = *q; @@ -2832,7 +2832,7 @@ mod tests { #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnlt_ss() { - // TODO: This test is exactly the same as for _mm_cmpge_ss, but there + // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there // must be a difference. It may have to do with behavior in the // presence of NaNs (signaling or quiet). If so, we should add tests // for those. @@ -2861,7 +2861,7 @@ mod tests { #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnle_ss() { - // TODO: This test is exactly the same as for _mm_cmpgt_ss, but there + // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there // must be a difference. It may have to do with behavior in the // presence // of NaNs (signaling or quiet). If so, we should add tests for those. @@ -2890,7 +2890,7 @@ mod tests { #[simd_test(enable = "sse")] unsafe fn test_mm_cmpngt_ss() { - // TODO: This test is exactly the same as for _mm_cmple_ss, but there + // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there // must be a difference. It may have to do with behavior in the // presence of NaNs (signaling or quiet). If so, we should add tests // for those. @@ -2919,7 +2919,7 @@ mod tests { #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnge_ss() { - // TODO: This test is exactly the same as for _mm_cmplt_ss, but there + // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there // must be a difference. It may have to do with behavior in the // presence of NaNs (signaling or quiet). If so, we should add tests // for those. diff --git a/library/stdarch/crates/core_arch/src/x86/tbm.rs b/library/stdarch/crates/core_arch/src/x86/tbm.rs index 314c5e36c7f..840974df9c2 100644 --- a/library/stdarch/crates/core_arch/src/x86/tbm.rs +++ b/library/stdarch/crates/core_arch/src/x86/tbm.rs @@ -230,7 +230,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 { /// Clears all bits below the least significant zero of `x` and sets all other /// bits. /// -/// If the least significant bit of `x` is 0, it sets all bits. +/// If the least significant bit of `x` is `0`, it sets all bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] @@ -242,7 +242,7 @@ pub unsafe fn _t1mskc_u32(x: u32) -> u32 { /// Clears all bits below the least significant zero of `x` and sets all other /// bits. /// -/// If the least significant bit of `x` is 0, it sets all bits. +/// If the least significant bit of `x` is `0`, it sets all bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs index dc86cb0cb22..c8c0f31d80f 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs @@ -92,7 +92,7 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 { /// Counts the number of trailing least significant zero bits. /// -/// When the source operand is 0, it returns its size in bits. +/// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64) #[inline] @@ -105,7 +105,7 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 { /// Counts the number of trailing least significant zero bits. /// -/// When the source operand is 0, it returns its size in bits. +/// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64) #[inline] @@ -170,7 +170,7 @@ mod tests { #[simd_test(enable = "bmi1")] unsafe fn test_blsr_u64() { - // TODO: test the behavior when the input is 0 + // TODO: test the behavior when the input is `0`. let r = _blsr_u64(0b0011_0000u64); assert_eq!(r, 0b0010_0000u64); } diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs index 7c180326feb..c0a5fb2e5d8 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs @@ -10,7 +10,7 @@ pub fn check_for(x: Feature) -> bool { } /// Try to read the features from the auxiliary vector, and if that fails, try -/// to read them from /proc/cpuinfo. +/// to read them from `/proc/cpuinfo`. fn detect_features() -> cache::Initializer { let mut value = cache::Initializer::default(); let enable_feature = |value: &mut cache::Initializer, f, enable| { @@ -26,6 +26,6 @@ fn detect_features() -> cache::Initializer { enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); return value; } - // TODO: fall back via cpuinfo + // TODO: fall back via `cpuinfo`. value } diff --git a/library/stdarch/crates/stdsimd-verify/tests/arm.rs b/library/stdarch/crates/stdsimd-verify/tests/arm.rs index 32f64304ec7..1b418f3442e 100644 --- a/library/stdarch/crates/stdsimd-verify/tests/arm.rs +++ b/library/stdarch/crates/stdsimd-verify/tests/arm.rs @@ -206,7 +206,7 @@ fn verify_all_signatures() { Some(i) => i, None => { // Skip all these intrinsics as they're not listed in NEON - // descriptions online + // descriptions online. // // TODO: we still need to verify these intrinsics or find a // reference for them, need to figure out where though! @@ -290,7 +290,7 @@ fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> { } } - // TODO: verify `target_feature` + // TODO: verify `target_feature`. Ok(()) } -- cgit 1.4.1-3-g733a5 From 61414fdd629426386fa498c4339fc99a6ed45351 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 19 Feb 2019 23:16:07 +0900 Subject: Change imports in std_detect to edition-agnostic style --- library/stdarch/crates/std_detect/src/detect/cache.rs | 6 +++--- library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs | 2 +- library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs | 2 +- library/stdarch/crates/std_detect/src/detect/os/x86.rs | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index ac1f3e4fae7..422bd6817da 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -3,13 +3,13 @@ #![allow(dead_code)] // not used on all platforms -use sync::atomic::Ordering; +use crate::sync::atomic::Ordering; #[cfg(target_pointer_width = "64")] -use sync::atomic::AtomicU64; +use crate::sync::atomic::AtomicU64; #[cfg(target_pointer_width = "32")] -use sync::atomic::AtomicU32; +use crate::sync::atomic::AtomicU32; /// Sets the `bit` of `x`. #[inline] diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs index 1c73cefd47d..952b4baf0fd 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs @@ -5,7 +5,7 @@ cfg_if! { mod aarch64; pub use self::aarch64::check_for; } else { - use arch::detect::Feature; + use crate::arch::detect::Feature; /// Performs run-time feature detection. pub fn check_for(_x: Feature) -> bool { false diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs index 5d935a26c89..07b6432eafd 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -2,7 +2,7 @@ #![cfg_attr(not(target_arch = "aarch64"), allow(dead_code))] #[cfg(feature = "std_detect_file_io")] -use ::{fs::File, io::Read}; +use crate::{fs::File, io::Read}; /// Key to access the CPU Hardware capabilities bitfield. pub(crate) const AT_HWCAP: usize = 16; diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index 30199b1f44f..675601536fa 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -1,11 +1,11 @@ //! x86 run-time feature detection is OS independent. #[cfg(target_arch = "x86")] -use arch::x86::*; +use crate::arch::x86::*; #[cfg(target_arch = "x86_64")] -use arch::x86_64::*; +use crate::arch::x86_64::*; -use mem; +use crate::mem; use crate::detect::{Feature, cache, bit}; -- cgit 1.4.1-3-g733a5 From 5238498dbdabdda0e73aa4ab7abfab04022acee9 Mon Sep 17 00:00:00 2001 From: myfreeweb Date: Wed, 27 Feb 2019 21:34:33 +0300 Subject: aarch64: escape square brackets in docs To comply with deny(intra_doc_link_resolution_failure) --- library/stdarch/crates/std_detect/src/detect/os/aarch64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs index f28d15a7c3e..dfb8c87707f 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs @@ -6,7 +6,7 @@ //! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 //! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. //! Each part of the register indicates the level of support for a certain feature, e.g. -//! when ID_AA64ISAR0_EL1[7:4] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported. //! //! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), //! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). -- cgit 1.4.1-3-g733a5 From eac0c6c8c1c4009fbc37bb54932e6240de005cb5 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 16 Apr 2019 10:50:30 +0200 Subject: Migrate clippy lints to tool lints --- library/stdarch/crates/core_arch/src/lib.rs | 33 +++++++++++--------------- library/stdarch/crates/std_detect/src/lib.rs | 7 ++---- library/stdarch/crates/stdsimd-test/src/lib.rs | 6 +---- 3 files changed, 17 insertions(+), 29 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs index fb621808b60..ea9a6206728 100644 --- a/library/stdarch/crates/core_arch/src/lib.rs +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -35,25 +35,20 @@ external_doc )] #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))] -#![cfg_attr( - feature = "cargo-clippy", - deny(clippy::missing_inline_in_public_items,) -)] -#![cfg_attr( - feature = "cargo-clippy", - allow( - clippy::inline_always, - clippy::too_many_arguments, - clippy::cast_sign_loss, - clippy::cast_lossless, - clippy::cast_possible_wrap, - clippy::cast_possible_truncation, - clippy::cast_precision_loss, - clippy::shadow_reuse, - clippy::cyclomatic_complexity, - clippy::similar_names, - clippy::many_single_char_names - ) +#![deny(clippy::missing_inline_in_public_items)] +#![allow( + clippy::inline_always, + clippy::too_many_arguments, + clippy::cast_sign_loss, + clippy::cast_lossless, + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::cast_precision_loss, + clippy::shadow_reuse, + clippy::cyclomatic_complexity, + clippy::cognitive_complexity, + clippy::similar_names, + clippy::many_single_char_names )] #![cfg_attr(test, allow(unused_imports))] #![no_std] diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs index da54cc73592..8b3bb304f1c 100644 --- a/library/stdarch/crates/std_detect/src/lib.rs +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -13,11 +13,8 @@ #![unstable(feature = "stdsimd", issue = "27731")] #![feature(const_fn, staged_api, stdsimd, doc_cfg, allow_internal_unstable)] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::shadow_reuse))] -#![cfg_attr( - feature = "cargo-clippy", - deny(clippy::missing_inline_in_public_items,) -)] +#![allow(clippy::shadow_reuse)] +#![deny(clippy::missing_inline_in_public_items)] #![cfg_attr(target_os = "linux", feature(linkage))] #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))] #![cfg_attr(stdsimd_strict, deny(warnings))] diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs index af1a9a28069..d20d3811ff2 100644 --- a/library/stdarch/crates/stdsimd-test/src/lib.rs +++ b/library/stdarch/crates/stdsimd-test/src/lib.rs @@ -3,11 +3,7 @@ //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. - -#![cfg_attr( - feature = "cargo-clippy", - allow(clippy::missing_docs_in_private_items, clippy::print_stdout) -)] +#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] extern crate assert_instr_macro; extern crate backtrace; -- cgit 1.4.1-3-g733a5 From 459afa7a592b6bc140836d0c426f854d63887c27 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 16 Apr 2019 11:01:19 +0200 Subject: Fix clippy issues --- .../stdarch/crates/assert-instr-macro/src/lib.rs | 2 +- library/stdarch/crates/core_arch/src/lib.rs | 1 - library/stdarch/crates/core_arch/src/macros.rs | 3 +-- library/stdarch/crates/core_arch/src/simd.rs | 4 ++-- library/stdarch/crates/core_arch/src/x86/adx.rs | 3 +-- library/stdarch/crates/core_arch/src/x86/avx.rs | 18 +++++++++--------- library/stdarch/crates/core_arch/src/x86/bswap.rs | 3 +-- library/stdarch/crates/core_arch/src/x86/cpuid.rs | 10 +++------- library/stdarch/crates/core_arch/src/x86/rdrand.rs | 3 +-- library/stdarch/crates/core_arch/src/x86/sse.rs | 10 +++++----- library/stdarch/crates/core_arch/src/x86/sse2.rs | 12 ++++++------ library/stdarch/crates/core_arch/src/x86/xsave.rs | 3 +-- .../stdarch/crates/core_arch/src/x86_64/bswap.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/rdrand.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/xsave.rs | 2 +- .../stdarch/crates/core_arch/tests/cpu-detection.rs | 5 +---- .../stdarch/crates/std_detect/src/detect/cache.rs | 9 +++------ .../stdarch/crates/std_detect/src/detect/os/x86.rs | 2 +- .../crates/std_detect/tests/cpu-detection.rs | 5 +---- .../std_detect/tests/macro_trailing_commas.rs | 5 +---- .../stdarch/crates/stdsimd-test/src/disassembly.rs | 6 +++--- library/stdarch/crates/stdsimd-test/src/lib.rs | 4 ++-- library/stdarch/crates/stdsimd-verify/src/lib.rs | 12 ++++++------ library/stdarch/examples/hex.rs | 21 +++++++++------------ 24 files changed, 61 insertions(+), 86 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs index 4525398b759..ba33dcfda92 100644 --- a/library/stdarch/crates/assert-instr-macro/src/lib.rs +++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs @@ -47,7 +47,7 @@ pub fn assert_instr( .replace('.', "_") .replace('/', "_") .replace(':', "_") - .replace(|c: char| c.is_whitespace(), ""); + .replace(char::is_whitespace, ""); let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span()); let shim_name = syn::Ident::new(&format!("{}_shim_{}", name, instr_str), name.span()); let mut inputs = Vec::new(); diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs index ea9a6206728..440b0546dbd 100644 --- a/library/stdarch/crates/core_arch/src/lib.rs +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -45,7 +45,6 @@ clippy::cast_possible_truncation, clippy::cast_precision_loss, clippy::shadow_reuse, - clippy::cyclomatic_complexity, clippy::cognitive_complexity, clippy::similar_names, clippy::many_single_char_names diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs index 74a01be7709..a636a0c0bcf 100644 --- a/library/stdarch/crates/core_arch/src/macros.rs +++ b/library/stdarch/crates/core_arch/src/macros.rs @@ -275,8 +275,7 @@ macro_rules! types { #[derive(Copy, Clone, Debug)] #[allow(non_camel_case_types)] #[repr(simd)] - #[cfg_attr(feature = "cargo-clippy", - allow(clippy::missing_inline_in_public_items))] + #[allow(clippy::missing_inline_in_public_items)] pub struct $name($($fields)*); )*) } diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs index 542ec5bb69c..79c61b5170d 100644 --- a/library/stdarch/crates/core_arch/src/simd.rs +++ b/library/stdarch/crates/core_arch/src/simd.rs @@ -9,7 +9,7 @@ macro_rules! simd_ty { #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct $id($(pub $elem_ty),*); - #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] + #[allow(clippy::use_self)] impl $id { #[inline] pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self { @@ -41,7 +41,7 @@ macro_rules! simd_m_ty { #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct $id($(pub $elem_ty),*); - #[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] + #[allow(clippy::use_self)] impl $id { #[inline] const fn bool_to_internal(x: bool) -> $ety { diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs index 36b733ae4e6..f5e7d4f2fb3 100644 --- a/library/stdarch/crates/core_arch/src/x86/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86/adx.rs @@ -32,8 +32,7 @@ pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { #[stable(feature = "simd_x86_adx", since = "1.33.0")] #[cfg(not(stage0))] pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { - let r = llvm_addcarryx_u32(c_in, a, b, out as *mut _ as *mut u8); - r + llvm_addcarryx_u32(c_in, a, b, out as *mut _ as *mut u8) } /// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index c00e902473c..86db1b10af8 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -1452,7 +1452,7 @@ pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i, imm8: i32) -> __ #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))] +#[allow(clippy::trivially_copy_pass_by_ref)] pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { _mm256_set1_ps(*f) } @@ -1465,7 +1465,7 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))] +#[allow(clippy::trivially_copy_pass_by_ref)] pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { _mm_set1_ps(*f) } @@ -1478,7 +1478,7 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))] +#[allow(clippy::trivially_copy_pass_by_ref)] pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d { _mm256_set1_pd(*f) } @@ -1618,7 +1618,7 @@ pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { *(mem_addr as *const __m256d) } @@ -1633,7 +1633,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { *(mem_addr as *mut __m256d) = a; } @@ -1648,7 +1648,7 @@ pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { *(mem_addr as *const __m256) } @@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) { *(mem_addr as *mut __m256) = a; } @@ -1959,7 +1959,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { intrinsics::nontemporal_store(mem_addr as *mut __m256d, a); } @@ -1974,7 +1974,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { intrinsics::nontemporal_store(mem_addr as *mut __m256, a); } diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs index 7d51082ff49..2896781f847 100644 --- a/library/stdarch/crates/core_arch/src/x86/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs @@ -1,6 +1,5 @@ //! Byte swap intrinsics. - -#![cfg_attr(feature = "cargo-clippy", allow(clippy::stutter))] +#![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs index 1c233e54601..c52157599c6 100644 --- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs +++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs @@ -1,16 +1,12 @@ //! `cpuid` intrinsics - -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] +#![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; /// Result of the `cpuid` instruction. -#[cfg_attr( - feature = "cargo-clippy", - // the derived impl of Debug for CpuidResult is not #[inline] and that's OK. - allow(clippy::missing_inline_in_public_items) -)] +#[allow(clippy::missing_inline_in_public_items)] +// ^^ the derived impl of Debug for CpuidResult is not #[inline] and that's OK. #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] #[stable(feature = "simd_x86", since = "1.27.0")] pub struct CpuidResult { diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs index 49abc3d0497..a1252933522 100644 --- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs @@ -1,8 +1,7 @@ //! RDRAND and RDSEED instructions for returning random numbers from an Intel //! on-chip hardware random number generator which has been seeded by an //! on-chip entropy source. - -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] +#![allow(clippy::module_name_repetitions)] #[allow(improper_ctypes)] extern "unadjusted" { diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index 03ea61c11ff..8075e266300 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -1209,7 +1209,7 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 { *(p as *const __m128) } @@ -1370,7 +1370,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) { let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]); *(p as *mut __m128) = b; @@ -1402,7 +1402,7 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) { *(p as *mut __m128) = a; } @@ -1446,7 +1446,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]); *(p as *mut __m128) = b; @@ -2033,7 +2033,7 @@ extern "C" { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movntps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { intrinsics::nontemporal_store(mem_addr as *mut __m128, a); } diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index 6f8fd06fd5e..e2ce40e6313 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -2529,7 +2529,7 @@ pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { *(mem_addr as *const __m128d) } @@ -2582,7 +2582,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { intrinsics::nontemporal_store(mem_addr as *mut __m128d, a); } @@ -2608,7 +2608,7 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { *(mem_addr as *mut __m128d) = a; } @@ -2634,7 +2634,7 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; @@ -2648,7 +2648,7 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; @@ -2663,7 +2663,7 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] +#[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [1, 0]); *(mem_addr as *mut __m128d) = b; diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs index 19a9dcbb287..f707caca199 100644 --- a/library/stdarch/crates/core_arch/src/x86/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs @@ -1,6 +1,5 @@ //! `i586`'s `xsave` and `xsaveopt` target feature intrinsics - -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] +#![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs index 6a94a42fc60..08bf1d2f87b 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -1,6 +1,6 @@ //! Byte swap intrinsics. -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] +#![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs index 40b781d4062..7b6d0de01f8 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -2,7 +2,7 @@ //! on-chip hardware random number generator which has been seeded by an //! on-chip entropy source. -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] +#![allow(clippy::module_name_repetitions)] #[allow(improper_ctypes)] extern "unadjusted" { diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs index 703dcde449f..964a24d12e2 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -1,6 +1,6 @@ //! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_name_repetitions))] +#![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs index 454176b18c4..9a7c999a18b 100644 --- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs +++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs @@ -1,9 +1,6 @@ #![feature(stdsimd)] #![cfg_attr(stdsimd_strict, deny(warnings))] -#![cfg_attr( - feature = "cargo-clippy", - allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug) -)] +#![allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[macro_use] diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs index 422bd6817da..92bc4b58d16 100644 --- a/library/stdarch/crates/std_detect/src/detect/cache.rs +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -30,7 +30,7 @@ const CACHE_CAPACITY: u32 = 63; #[derive(Copy, Clone)] pub(crate) struct Initializer(u64); -#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] +#[allow(clippy::use_self)] impl Default for Initializer { fn default() -> Self { Initializer(0) @@ -76,13 +76,10 @@ static CACHE: Cache = Cache::uninitialized(); struct Cache(AtomicU64); #[cfg(target_pointer_width = "64")] -#[cfg_attr(feature = "cargo-clippy", allow(clippy::use_self))] +#[allow(clippy::use_self)] impl Cache { /// Creates an uninitialized cache. - #[cfg_attr( - feature = "cargo-clippy", - allow(clippy::declare_interior_mutable_const) - )] + #[allow(clippy::declare_interior_mutable_const)] const fn uninitialized() -> Self { Cache(AtomicU64::new(u64::max_value())) } diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index 675601536fa..db8aa261188 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -30,7 +30,7 @@ pub fn check_for(x: Feature) -> bool { /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf -#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))] +#[allow(clippy::similar_names)] fn detect_features() -> cache::Initializer { let mut value = cache::Initializer::default(); diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs index 85beeee63ac..b2b8abb0102 100644 --- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs +++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs @@ -1,9 +1,6 @@ #![feature(stdsimd)] #![cfg_attr(stdsimd_strict, deny(warnings))] -#![cfg_attr( - feature = "cargo-clippy", - allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout) -)] +#![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)] #[cfg(any( target_arch = "arm", diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs index d63da6af06a..df03e6555aa 100644 --- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs +++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs @@ -1,9 +1,6 @@ #![feature(stdsimd)] #![cfg_attr(stdsimd_strict, deny(warnings))] -#![cfg_attr( - feature = "cargo-clippy", - allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout) -)] +#![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)] #[cfg(any( target_arch = "arm", diff --git a/library/stdarch/crates/stdsimd-test/src/disassembly.rs b/library/stdarch/crates/stdsimd-test/src/disassembly.rs index 3882207080b..1aee27a9e4f 100644 --- a/library/stdarch/crates/stdsimd-test/src/disassembly.rs +++ b/library/stdarch/crates/stdsimd-test/src/disassembly.rs @@ -103,7 +103,7 @@ fn parse_objdump(output: &str) -> HashMap> { && usize::from_str_radix(s, 16).is_ok() }) .skip_while(|s| *s == "lock") // skip x86-specific prefix - .map(|s| s.to_string()) + .map(std::string::ToString::to_string) .collect::>(); instructions.push(Instruction { parts }); } @@ -148,7 +148,7 @@ fn parse_otool(output: &str) -> HashMap> { let parts = instruction .split_whitespace() .skip(1) - .map(|s| s.to_string()) + .map(std::string::ToString::to_string) .collect::>(); instructions.push(Instruction { parts }); } @@ -199,7 +199,7 @@ fn parse_dumpbin(output: &str) -> HashMap> { .skip(1) .skip_while(|s| { s.len() == 2 && usize::from_str_radix(s, 16).is_ok() - }).map(|s| s.to_string()) + }).map(std::string::ToString::to_string) .skip_while(|s| *s == "lock") // skip x86-specific prefix .collect::>(); instructions.push(Instruction { parts }); diff --git a/library/stdarch/crates/stdsimd-test/src/lib.rs b/library/stdarch/crates/stdsimd-test/src/lib.rs index d20d3811ff2..a405412b427 100644 --- a/library/stdarch/crates/stdsimd-test/src/lib.rs +++ b/library/stdarch/crates/stdsimd-test/src/lib.rs @@ -61,10 +61,10 @@ fn normalize(symbol: &str) -> String { }; // Normalize to no leading underscore to handle platforms that may // inject extra ones in symbol names. - while ret.starts_with("_") { + while ret.starts_with('_') { ret.remove(0); } - return ret; + ret } /// Main entry point for this crate, called by the `#[assert_instr]` macro. diff --git a/library/stdarch/crates/stdsimd-verify/src/lib.rs b/library/stdarch/crates/stdsimd-verify/src/lib.rs index 284b35dc13b..e832e23d4e9 100644 --- a/library/stdarch/crates/stdsimd-verify/src/lib.rs +++ b/library/stdarch/crates/stdsimd-verify/src/lib.rs @@ -243,17 +243,17 @@ fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) { continue; } let path = file.path(); - if path.extension().and_then(|s| s.to_str()) != Some("rs") { + if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") { continue; } - if path.file_name().and_then(|s| s.to_str()) == Some("test.rs") { + if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") { continue; } let mut contents = String::new(); File::open(&path) - .expect(&format!("can't open file at path: {}", path.display())) + .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display())) .read_to_string(&mut contents) .expect("failed to read file to string"); @@ -284,8 +284,8 @@ fn find_instrs(attrs: &[syn::Attribute]) -> Vec { fn parse(content: syn::parse::ParseStream) -> syn::parse::Result { let input; parenthesized!(input in content); - drop(input.parse::()?); - drop(input.parse::()?); + let _ = input.parse::()?; + let _ = input.parse::()?; let ident = input.parse::()?; if ident != "assert_instr" { return Err(input.error("expected `assert_instr`")); @@ -358,7 +358,7 @@ struct RustcArgsRequiredConst { } impl syn::parse::Parse for RustcArgsRequiredConst { - #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_possible_truncation))] + #[allow(clippy::cast_possible_truncation)] fn parse(input: syn::parse::ParseStream) -> syn::parse::Result { let content; parenthesized!(content in input); diff --git a/library/stdarch/examples/hex.rs b/library/stdarch/examples/hex.rs index 8b9a4cdd0d0..7589578517e 100644 --- a/library/stdarch/examples/hex.rs +++ b/library/stdarch/examples/hex.rs @@ -14,18 +14,15 @@ #![feature(stdsimd)] #![cfg_attr(test, feature(test))] -#![cfg_attr( - feature = "cargo-clippy", - allow( - clippy::result_unwrap_used, - clippy::print_stdout, - clippy::option_unwrap_used, - clippy::shadow_reuse, - clippy::cast_possible_wrap, - clippy::cast_ptr_alignment, - clippy::cast_sign_loss, - clippy::missing_docs_in_private_items - ) +#![allow( + clippy::result_unwrap_used, + clippy::print_stdout, + clippy::option_unwrap_used, + clippy::shadow_reuse, + clippy::cast_possible_wrap, + clippy::cast_ptr_alignment, + clippy::cast_sign_loss, + clippy::missing_docs_in_private_items )] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -- cgit 1.4.1-3-g733a5 From 26d6e048cc2642a5a227439d99066d8f2ff06bc4 Mon Sep 17 00:00:00 2001 From: tyler Date: Wed, 17 Apr 2019 20:18:21 -0700 Subject: add rtm cpu feature intrinsics --- library/stdarch/crates/core_arch/src/lib.rs | 1 + library/stdarch/crates/core_arch/src/x86/mod.rs | 3 + library/stdarch/crates/core_arch/src/x86/rtm.rs | 151 +++++++++++++++++++++ .../crates/std_detect/src/detect/arch/x86.rs | 8 ++ .../stdarch/crates/std_detect/src/detect/os/x86.rs | 6 + 5 files changed, 169 insertions(+) create mode 100644 library/stdarch/crates/core_arch/src/x86/rtm.rs (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs index f57d1da9641..d936d0cc1cd 100644 --- a/library/stdarch/crates/core_arch/src/lib.rs +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -32,6 +32,7 @@ wasm_target_feature, abi_unadjusted, adx_target_feature, + rtm_target_feature, external_doc )] #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))] diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs index 5f28aa8836d..ad254f91a51 100644 --- a/library/stdarch/crates/core_arch/src/x86/mod.rs +++ b/library/stdarch/crates/core_arch/src/x86/mod.rs @@ -565,3 +565,6 @@ pub use self::avx512ifma::*; mod bt; pub use self::bt::*; + +mod rtm; +pub use self::rtm::*; \ No newline at end of file diff --git a/library/stdarch/crates/core_arch/src/x86/rtm.rs b/library/stdarch/crates/core_arch/src/x86/rtm.rs new file mode 100644 index 00000000000..d03ba7b82b0 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/rtm.rs @@ -0,0 +1,151 @@ +#[cfg(test)] +use stdsimd_test::assert_instr; + +extern "C" { + #[link_name = "llvm.x86.xbegin"] + fn x86_xbegin() -> i32; + #[link_name = "llvm.x86.xend"] + fn x86_xend() -> (); + #[link_name = "llvm.x86.xabort"] + fn x86_xabort(imm8: i8) -> (); + #[link_name = "llvm.x86.xtest"] + fn x86_xtest() -> i32; +} + +/// Transaction successfully started. +pub const _XBEGIN_STARTED: u32 = !0; + +/// Transaction explicitly aborted with xabort. The parameter passed to xabort is available with +/// _xabort_code(status). +pub const _XABORT_EXPLICIT: u32 = 1 << 0; + +/// Transaction retry is possible. +pub const _XABORT_RETRY: u32 = 1 << 1; + +/// Transaction abort due to a memory conflict with another thread. +pub const _XABORT_CONFLICT: u32 = 1 << 2; + +/// Transaction abort due to the transaction using too much memory. +pub const _XABORT_CAPACITY: u32 = 1 << 3; + +/// Transaction abort due to a debug trap. +pub const _XABORT_DEBUG: u32 = 1 << 4; + +/// Transaction abort in a inner nested transaction. +pub const _XABORT_NESTED: u32 = 1 << 5; + +/// Specifies the start of a restricted transactional memory (RTM) code region and returns a value +/// indicating status. +/// +/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xbegin). +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xbegin))] +pub unsafe fn _xbegin() -> u32 { + x86_xbegin() as _ +} + +/// Specifies the end of a restricted transactional memory (RTM) code region. +/// +/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xend). +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xend))] +pub unsafe fn _xend() { + x86_xend() +} + +/// Forces a restricted transactional memory (RTM) region to abort. +/// +/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xabort). +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xabort))] +pub unsafe fn _xabort(imm8: u32) { + macro_rules! call { + ($imm8:expr) => { + x86_xabort($imm8) + }; + } + constify_imm8!(imm8, call) +} + +/// Queries whether the processor is executing in a transactional region identified by restricted +/// transactional memory (RTM) or hardware lock elision (HLE). +/// +/// [Intel's documentation](https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-xtest). +#[inline] +#[target_feature(enable = "rtm")] +#[cfg_attr(test, assert_instr(xtest))] +pub unsafe fn _xtest() -> bool { + x86_xtest() != 0 +} + +/// Retrieves the parameter passed to [`_xabort`] when [`_xbegin`]'s status has the `_XABORT_EXPLICIT` flag set. +#[inline] +pub fn _xabort_code(status: u32) -> u32 { + (status >> 24) & 0xFF +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + + #[test] + fn test_xbegin_xend() { + unsafe { + let mut x = 0; + for _ in 0..10 { + let code = rtm::_xbegin(); + if code == _XBEGIN_STARTED { + x += 1; + rtm::_xend(); + assert_eq!(x, 1); + break + } + assert_eq!(x, 0); + } + } + } + + #[test] + fn test_xabort() { + unsafe { + // aborting with outside a transactional region does nothing + _xabort(0); + + for abort_code in 0..10 { + let mut x = 0; + let code = rtm::_xbegin(); + if code == _XBEGIN_STARTED { + x += 1; + rtm::_xabort(abort_code); + } else if code & _XABORT_EXPLICIT != 0 { + let test_abort_code = rtm::_xabort_code(code); + assert_eq!(test_abort_code, abort_code); + } + assert_eq!(x, 0); + } + } + } + + #[test] + fn test_xtest() { + unsafe { + assert_eq!(_xtest(), false); + + for _ in 0..10 { + let code = rtm::_xbegin(); + if code == _XBEGIN_STARTED { + let in_tx = _xtest(); + rtm::_xend(); + + // putting the assert inside the transaction would abort the transaction on fail + // without any output/panic/etc + assert_eq!(in_tx, true); + break + } + } + } + } +} \ No newline at end of file diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 580d7111bdd..45f2d5bfc87 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -74,6 +74,8 @@ /// * `"xsaveopt"` /// * `"xsaves"` /// * `"xsavec"` +/// * `"adx"` +/// * `"rtm"` /// /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[macro_export] @@ -233,6 +235,10 @@ macro_rules! is_x86_feature_detected { cfg!(target_feature = "adx") || $crate::detect::check_for( $crate::detect::Feature::adx) }; + ("rtm") => { + cfg!(target_feature = "rtm") || $crate::detect::check_for( + $crate::detect::Feature::rtm) + }; ($t:tt,) => { is_x86_feature_detected!($t); }; @@ -330,4 +336,6 @@ pub enum Feature { cmpxchg16b, /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) adx, + /// RTM, Intel (Restricted Transactional Memory) + rtm, } diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index db8aa261188..ab0622106c8 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -123,6 +123,7 @@ fn detect_features() -> cache::Initializer { enable(proc_info_ecx, 30, Feature::rdrand); enable(extended_features_ebx, 18, Feature::rdseed); enable(extended_features_ebx, 19, Feature::adx); + enable(extended_features_ebx, 11, Feature::rtm); enable(proc_info_edx, 4, Feature::tsc); enable(proc_info_edx, 23, Feature::mmx); enable(proc_info_edx, 24, Feature::fxsr); @@ -290,6 +291,7 @@ mod tests { println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); println!("adx: {:?}", is_x86_feature_detected!("adx")); + println!("rtm: {:?}", is_x86_feature_detected!("rtm")); } #[test] @@ -354,5 +356,9 @@ mod tests { is_x86_feature_detected!("adx"), information.adx(), ); + assert_eq!( + is_x86_feature_detected!("rtm"), + information.rtm(), + ); } } -- cgit 1.4.1-3-g733a5 From d31cc0b09e14f33e888f1bb83bd88c6147ba6fcc Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 23 Apr 2019 10:10:41 +0200 Subject: Add runtime feature detection for F16C --- library/stdarch/crates/core_arch/src/x86/f16c.rs | 5 +++-- library/stdarch/crates/core_arch/tests/cpu-detection.rs | 1 + library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 7 +++++++ library/stdarch/crates/std_detect/src/detect/os/x86.rs | 3 ++- library/stdarch/crates/std_detect/tests/cpu-detection.rs | 1 + 5 files changed, 14 insertions(+), 3 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs index 496e3a70671..597d86b2d04 100644 --- a/library/stdarch/crates/core_arch/src/x86/f16c.rs +++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs @@ -1,5 +1,6 @@ -//! F16C intrinsics: -//! https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769 +//! [F16C intrinsics]. +//! +//! [F16C intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769 use crate::{ core_arch::{simd::*, x86::*}, diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs index 9a7c999a18b..321f24e9fc2 100644 --- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs +++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs @@ -31,6 +31,7 @@ fn x86_all() { "avx512_vpopcntdq {:?}", is_x86_feature_detected!("avx512vpopcntdq") ); + println!("f16c: {:?}", is_x86_feature_detected!("f16c")); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("abm: {:?}", is_x86_feature_detected!("abm")); println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index 45f2d5bfc87..da14ce5cf0f 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -62,6 +62,7 @@ /// * `"avx512ifma"` /// * `"avx512vbmi"` /// * `"avx512vpopcntdq"` +/// * `"f16c"` /// * `"fma"` /// * `"bmi1"` /// * `"bmi2"` @@ -179,6 +180,10 @@ macro_rules! is_x86_feature_detected { cfg!(target_feature = "avx512vpopcntdq") || $crate::detect::check_for( $crate::detect::Feature::avx512_vpopcntdq) }; + ("f16c") => { + cfg!(target_feature = "avx512f") || $crate::detect::check_for( + $crate::detect::Feature::f16c) + }; ("fma") => { cfg!(target_feature = "fma") || $crate::detect::check_for( $crate::detect::Feature::fma) @@ -309,6 +314,8 @@ pub enum Feature { /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and /// Quadword) avx512_vpopcntdq, + /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) + f16c, /// FMA (Fused Multiply Add) fma, /// BMI1 (Bit Manipulation Instructions 1) diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index ab0622106c8..e543d301c79 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -113,13 +113,14 @@ fn detect_features() -> cache::Initializer { }; enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 1, Feature::pclmulqdq); enable(proc_info_ecx, 9, Feature::ssse3); enable(proc_info_ecx, 13, Feature::cmpxchg16b); enable(proc_info_ecx, 19, Feature::sse4_1); enable(proc_info_ecx, 20, Feature::sse4_2); enable(proc_info_ecx, 23, Feature::popcnt); enable(proc_info_ecx, 25, Feature::aes); - enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 29, Feature::f16c); enable(proc_info_ecx, 30, Feature::rdrand); enable(extended_features_ebx, 18, Feature::rdseed); enable(extended_features_ebx, 19, Feature::adx); diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs index b2b8abb0102..0aae39e2947 100644 --- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs +++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs @@ -87,6 +87,7 @@ fn x86_all() { "avx512_vpopcntdq {:?}", is_x86_feature_detected!("avx512vpopcntdq") ); + println!("f16c: {:?}", is_x86_feature_detected!("f16c")); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("bmi1: {:?}", is_x86_feature_detected!("bmi1")); println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); -- cgit 1.4.1-3-g733a5 From 6d59dc14abd82281e756d67636aeaac4771eb3a9 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 24 Apr 2019 16:24:24 +0200 Subject: Update f16c intrinsics to use the f16c target feature --- library/stdarch/crates/core_arch/src/x86/f16c.rs | 12 ++++++------ library/stdarch/crates/std_detect/src/detect/arch/x86.rs | 2 +- library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs | 7 +++---- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs index 597d86b2d04..195485914b3 100644 --- a/library/stdarch/crates/core_arch/src/x86/f16c.rs +++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs @@ -27,7 +27,7 @@ extern "unadjusted" { /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide /// vector. #[inline] -#[target_feature(enable = "avx512f")] +#[target_feature(enable = "f16c")] #[cfg_attr(test, assert_instr("vcvtph2ps"))] pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { transmute(llvm_vcvtph2ps_128(transmute(a))) @@ -36,7 +36,7 @@ pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector. #[inline] -#[target_feature(enable = "avx512f")] +#[target_feature(enable = "f16c")] #[cfg_attr(test, assert_instr("vcvtph2ps"))] pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 { transmute(llvm_vcvtph2ps_256(transmute(a))) @@ -70,7 +70,7 @@ macro_rules! dispatch_rounding { /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions, /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]. #[inline] -#[target_feature(enable = "avx512f")] +#[target_feature(enable = "f16c")] #[rustc_args_required_const(1)] #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))] pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i { @@ -94,7 +94,7 @@ pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i { /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions, /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]. #[inline] -#[target_feature(enable = "avx512f")] +#[target_feature(enable = "f16c")] #[rustc_args_required_const(1)] #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))] pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i { @@ -112,7 +112,7 @@ mod tests { use crate::{core_arch::x86::*, mem::transmute}; use stdsimd_test::simd_test; - #[simd_test(enable = "avx512f")] + #[simd_test(enable = "f16c")] unsafe fn test_mm_cvtph_ps() { let array = [1_f32, 2_f32, 3_f32, 4_f32]; let float_vec: __m128 = transmute(array); @@ -122,7 +122,7 @@ mod tests { assert_eq!(result, array); } - #[simd_test(enable = "avx512f")] + #[simd_test(enable = "f16c")] unsafe fn test_mm256_cvtph_ps() { let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32]; let float_vec: __m256 = transmute(array); diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index da14ce5cf0f..50d5cfa87c4 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -181,7 +181,7 @@ macro_rules! is_x86_feature_detected { $crate::detect::Feature::avx512_vpopcntdq) }; ("f16c") => { - cfg!(target_feature = "avx512f") || $crate::detect::check_for( + cfg!(target_feature = "f16c") || $crate::detect::check_for( $crate::detect::Feature::f16c) }; ("fma") => { diff --git a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs b/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs index 49799a5854a..fe6d8014400 100644 --- a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs +++ b/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs @@ -299,10 +299,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { // it "avx512ifma". "avx512ifma52" => String::from("avx512ifma"), // See: https://github.com/rust-lang-nursery/stdsimd/issues/738 - // FIXME: we need to fix "fp16c" to "f16c" here. Since - // https://github.com/rust-lang/rust/pull/60191 is not merged, - // we temporarily map it to "avx512f". - "fp16c" => String::from("avx512f"), + // The intrinsics guide calls `f16c` `fp16c` in disagreement with + // Intel's architecture manuals. + "fp16c" => String::from("f16c"), _ => cpuid, }; let fixed_cpuid = fixup_cpuid(cpuid); -- cgit 1.4.1-3-g733a5 From a62067658ddcdbd085be256e3800cabd267fc79b Mon Sep 17 00:00:00 2001 From: miki Date: Fri, 26 Apr 2019 17:16:12 +0200 Subject: Add std_detect for FreeBSD armv6, armv7 and powerpc64 --- .../crates/std_detect/src/detect/os/freebsd/arm.rs | 27 +++++++ .../std_detect/src/detect/os/freebsd/auxvec.rs | 86 ++++++++++++++++++++++ .../crates/std_detect/src/detect/os/freebsd/mod.rs | 8 ++ .../std_detect/src/detect/os/freebsd/powerpc.rs | 27 +++++++ 4 files changed, 148 insertions(+) create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs create mode 100644 library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs new file mode 100644 index 00000000000..e13847dcbd8 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs @@ -0,0 +1,27 @@ +//! Run-time feature detection for ARM on FreeBSD + +use crate::detect::{Feature, cache}; +use super::{auxvec}; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +/// Try to read the features from the auxiliary vector +fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, auxv.hwcap & 0x00001000 != 0); + enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & 0x00000002 != 0); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs new file mode 100644 index 00000000000..fd2bd500bfa --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs @@ -0,0 +1,86 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr(any(target_arch = "arm", target_arch = "powerpc64"), allow(dead_code))] + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 25; +/// Key to access the CPU Hardware capabilities 2 bitfield. +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For FreeBSD, they are +/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707 +pub(crate) fn auxv() -> Result { + if let Ok(hwcap) = archauxv(AT_HWCAP) { + if let Ok(hwcap2) = archauxv(AT_HWCAP2) { + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: usize) -> Result { + use mem; + + #[derive (Copy, Clone)] + #[repr(C)] + pub struct Elf_Auxinfo { + pub a_type: usize, + pub a_un: unnamed, + } + #[derive (Copy, Clone)] + #[repr(C)] + pub union unnamed { + pub a_val: libc::c_long, + pub a_ptr: *mut libc::c_void, + pub a_fcn: Option ()>, + } + + let mut auxv: [Elf_Auxinfo; 27] = + [Elf_Auxinfo{a_type: 0, a_un: unnamed{a_val: 0,},}; 27]; + + let mut len: libc::c_uint = mem::size_of_val(&auxv) as libc::c_uint; + + unsafe { + let mut mib = [libc::CTL_KERN, libc::KERN_PROC, libc::KERN_PROC_AUXV, libc::getpid()]; + + let ret = libc::sysctl(mib.as_mut_ptr(), + mib.len() as u32, + &mut auxv as *mut _ as *mut _, + &mut len as *mut _ as *mut _, + 0 as *mut libc::c_void, + 0, + ); + + if ret != -1 { + for i in 0..auxv.len() { + if auxv[i].a_type == key { + return Ok(auxv[i].a_un.a_val as usize); + } + } + } + } + return Ok(0); +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs index 952b4baf0fd..1a5338a3555 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs @@ -1,9 +1,17 @@ //! Run-time feature detection on FreeBSD +mod auxvec; + cfg_if! { if #[cfg(target_arch = "aarch64")] { mod aarch64; pub use self::aarch64::check_for; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub use self::arm::check_for; + } else if #[cfg(target_arch = "powerpc64")] { + mod powerpc; + pub use self::powerpc::check_for; } else { use crate::arch::detect::Feature; /// Performs run-time feature detection. diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs new file mode 100644 index 00000000000..c7f761d4d60 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs @@ -0,0 +1,27 @@ +//! Run-time feature detection for PowerPC on FreeBSD. + +use crate::detect::{Feature, cache}; +use super::{auxvec}; + +/// Performs run-time feature detection. +#[inline] +pub fn check_for(x: Feature) -> bool { + cache::test(x as u32, detect_features) +} + +fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} -- cgit 1.4.1-3-g733a5 From a2b98a167e263ca487c4eb76d1c19f1b69a09348 Mon Sep 17 00:00:00 2001 From: MikaelUrankar Date: Sat, 11 May 2019 15:23:35 +0200 Subject: Fix detection of power8 The power8 feature is defined in hwcap2 --- library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs index 0022a7db983..1c08a58443d 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs @@ -27,7 +27,7 @@ fn detect_features() -> cache::Initializer { // index of the bit to test like in ARM and Aarch64) enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); - enable_feature(&mut value, Feature::power8, auxv.hwcap & 0x80000000 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); return value; } -- cgit 1.4.1-3-g733a5 From 491b7c0c53d8d24db8062f1d1154795090894dae Mon Sep 17 00:00:00 2001 From: Tobias Kortkamp Date: Thu, 23 May 2019 03:43:54 +0000 Subject: Fix build of auxvec.rs on FreeBSD/powerpc64 ``` error[E0432]: unresolved import `mem` --> src/libstd/../stdsimd/crates/std_detect/src/detect/os/freebsd/auxvec.rs:45:9 | 45 | use mem; | ^^^ no `mem` external crate error: aborting due to previous error For more information about this error, try `rustc --explain E0432`. error: Could not compile `std`. ``` Tested by @pkubaj in https://reviews.freebsd.org/D20332 --- library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs index fd2bd500bfa..a2bac767601 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs @@ -42,7 +42,7 @@ pub(crate) fn auxv() -> Result { /// Tries to read the `key` from the auxiliary vector. fn archauxv(key: usize) -> Result { - use mem; + use crate::mem; #[derive (Copy, Clone)] #[repr(C)] -- cgit 1.4.1-3-g733a5 From 6369621e7981c38f86e5e1e63382805416aefb26 Mon Sep 17 00:00:00 2001 From: hygonsoc Date: Sat, 25 May 2019 21:36:53 +0800 Subject: add Hygon Dhyana CPU Vendor ID("HygonGenuine") checking As Hygon Dhyana originates from AMD technology and shares most of the architecture with AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series number(Family 18h). for CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD family 17h. AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. Related Hygon kernel patch can be found on http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn --- library/stdarch/crates/std_detect/src/detect/os/x86.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index e543d301c79..9257b8a4be6 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -234,8 +234,18 @@ fn detect_features() -> cache::Initializer { // The `is_x86_feature_detected!("lzcnt")` macro then // internally maps to Feature::abm. enable(extended_proc_info_ecx, 5, Feature::abm); - if vendor_id == *b"AuthenticAMD" { - // These features are only available on AMD CPUs: + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series + // number(Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: enable(extended_proc_info_ecx, 6, Feature::sse4a); enable(extended_proc_info_ecx, 21, Feature::tbm); } -- cgit 1.4.1-3-g733a5 From 686b813f5d8ac504fb2f254731d0d681147d415e Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 8 Jul 2019 23:21:37 +0200 Subject: Update repo name --- library/stdarch/.travis.yml | 16 +- library/stdarch/CONTRIBUTING.md | 26 +- library/stdarch/Cargo.toml | 2 +- library/stdarch/QUESTIONS.md | 4 +- library/stdarch/README.md | 8 +- library/stdarch/ci/run-docker.sh | 12 +- library/stdarch/ci/run.sh | 14 +- .../stdarch/crates/assert-instr-macro/src/lib.rs | 10 +- library/stdarch/crates/core_arch/Cargo.toml | 14 +- library/stdarch/crates/core_arch/README.md | 22 +- .../stdarch/crates/core_arch/src/aarch64/crc.rs | 4 +- .../stdarch/crates/core_arch/src/aarch64/crypto.rs | 4 +- .../stdarch/crates/core_arch/src/aarch64/mod.rs | 2 +- .../stdarch/crates/core_arch/src/aarch64/neon.rs | 4 +- library/stdarch/crates/core_arch/src/aarch64/v8.rs | 2 +- library/stdarch/crates/core_arch/src/acle/dsp.rs | 4 +- .../crates/core_arch/src/acle/registers/aarch32.rs | 2 +- .../stdarch/crates/core_arch/src/acle/simd32.rs | 4 +- .../stdarch/crates/core_arch/src/arm/armclang.rs | 2 +- library/stdarch/crates/core_arch/src/arm/mod.rs | 2 +- library/stdarch/crates/core_arch/src/arm/neon.rs | 4 +- .../crates/core_arch/src/arm/table_lookup_tests.rs | 2 +- library/stdarch/crates/core_arch/src/arm/v6.rs | 2 +- library/stdarch/crates/core_arch/src/arm/v7.rs | 8 +- library/stdarch/crates/core_arch/src/lib.rs | 3 +- library/stdarch/crates/core_arch/src/mips/mod.rs | 2 +- library/stdarch/crates/core_arch/src/mips/msa.rs | 4 +- library/stdarch/crates/core_arch/src/mod.rs | 2 +- .../crates/core_arch/src/powerpc/altivec.rs | 4 +- .../stdarch/crates/core_arch/src/powerpc/mod.rs | 2 +- .../stdarch/crates/core_arch/src/powerpc/vsx.rs | 4 +- .../stdarch/crates/core_arch/src/wasm32/atomic.rs | 2 +- .../stdarch/crates/core_arch/src/wasm32/memory.rs | 2 +- library/stdarch/crates/core_arch/src/wasm32/mod.rs | 2 +- .../stdarch/crates/core_arch/src/wasm32/simd128.rs | 2 +- library/stdarch/crates/core_arch/src/x86/abm.rs | 4 +- library/stdarch/crates/core_arch/src/x86/adx.rs | 4 +- library/stdarch/crates/core_arch/src/x86/aes.rs | 4 +- library/stdarch/crates/core_arch/src/x86/avx.rs | 8 +- library/stdarch/crates/core_arch/src/x86/avx2.rs | 4 +- .../stdarch/crates/core_arch/src/x86/avx512f.rs | 4 +- .../stdarch/crates/core_arch/src/x86/avx512ifma.rs | 4 +- library/stdarch/crates/core_arch/src/x86/bmi1.rs | 4 +- library/stdarch/crates/core_arch/src/x86/bmi2.rs | 4 +- library/stdarch/crates/core_arch/src/x86/bswap.rs | 2 +- library/stdarch/crates/core_arch/src/x86/bt.rs | 2 +- library/stdarch/crates/core_arch/src/x86/cpuid.rs | 2 +- library/stdarch/crates/core_arch/src/x86/f16c.rs | 4 +- library/stdarch/crates/core_arch/src/x86/fma.rs | 4 +- library/stdarch/crates/core_arch/src/x86/fxsr.rs | 4 +- library/stdarch/crates/core_arch/src/x86/mmx.rs | 4 +- library/stdarch/crates/core_arch/src/x86/mod.rs | 10 +- .../stdarch/crates/core_arch/src/x86/pclmulqdq.rs | 4 +- library/stdarch/crates/core_arch/src/x86/rdrand.rs | 2 +- library/stdarch/crates/core_arch/src/x86/rdtsc.rs | 4 +- library/stdarch/crates/core_arch/src/x86/rtm.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sha.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sse.rs | 6 +- library/stdarch/crates/core_arch/src/x86/sse2.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sse3.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sse41.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sse42.rs | 4 +- library/stdarch/crates/core_arch/src/x86/sse4a.rs | 4 +- library/stdarch/crates/core_arch/src/x86/ssse3.rs | 4 +- library/stdarch/crates/core_arch/src/x86/tbm.rs | 4 +- library/stdarch/crates/core_arch/src/x86/xsave.rs | 12 +- library/stdarch/crates/core_arch/src/x86_64/abm.rs | 4 +- library/stdarch/crates/core_arch/src/x86_64/adx.rs | 4 +- library/stdarch/crates/core_arch/src/x86_64/avx.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/avx2.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/bmi.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/bmi2.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/bswap.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/bt.rs | 2 +- .../crates/core_arch/src/x86_64/cmpxchg16b.rs | 2 +- .../stdarch/crates/core_arch/src/x86_64/fxsr.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/rdrand.rs | 2 +- library/stdarch/crates/core_arch/src/x86_64/sse.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/sse2.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/sse41.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/sse42.rs | 4 +- .../stdarch/crates/core_arch/src/x86_64/xsave.rs | 8 +- .../crates/core_arch/tests/cpu-detection.rs | 1 - library/stdarch/crates/simd-test-macro/src/lib.rs | 2 +- library/stdarch/crates/std_detect/Cargo.toml | 12 +- library/stdarch/crates/std_detect/README.md | 6 +- library/stdarch/crates/std_detect/src/lib.rs | 1 - .../crates/std_detect/tests/cpu-detection.rs | 1 - .../std_detect/tests/macro_trailing_commas.rs | 1 - library/stdarch/crates/stdarch-test/Cargo.toml | 20 + .../stdarch/crates/stdarch-test/src/disassembly.rs | 182 + library/stdarch/crates/stdarch-test/src/lib.rs | 178 + library/stdarch/crates/stdarch-test/src/wasm.rs | 88 + .../stdarch/crates/stdarch-verify/.gitattributes | 1 + library/stdarch/crates/stdarch-verify/Cargo.toml | 19 + .../crates/stdarch-verify/arm-intrinsics.html | 93399 ++++++++++++ library/stdarch/crates/stdarch-verify/build.rs | 28 + library/stdarch/crates/stdarch-verify/mips-msa.h | 707 + library/stdarch/crates/stdarch-verify/src/lib.rs | 387 + library/stdarch/crates/stdarch-verify/tests/arm.rs | 592 + .../stdarch/crates/stdarch-verify/tests/mips.rs | 335 + .../crates/stdarch-verify/tests/x86-intel.rs | 537 + .../stdarch/crates/stdarch-verify/x86-intel.xml | 134861 ++++++++++++++++++ library/stdarch/crates/stdsimd-test/Cargo.toml | 20 - .../stdarch/crates/stdsimd-test/src/disassembly.rs | 182 - library/stdarch/crates/stdsimd-test/src/lib.rs | 178 - library/stdarch/crates/stdsimd-test/src/wasm.rs | 88 - .../stdarch/crates/stdsimd-verify/.gitattributes | 1 - library/stdarch/crates/stdsimd-verify/Cargo.toml | 19 - .../crates/stdsimd-verify/arm-intrinsics.html | 93399 ------------ library/stdarch/crates/stdsimd-verify/build.rs | 28 - library/stdarch/crates/stdsimd-verify/mips-msa.h | 707 - library/stdarch/crates/stdsimd-verify/src/lib.rs | 387 - library/stdarch/crates/stdsimd-verify/tests/arm.rs | 592 - .../stdarch/crates/stdsimd-verify/tests/mips.rs | 335 - .../crates/stdsimd-verify/tests/x86-intel.rs | 537 - .../stdarch/crates/stdsimd-verify/x86-intel.xml | 134861 ------------------ library/stdarch/examples/Cargo.toml | 4 +- library/stdarch/examples/hex.rs | 4 +- library/stdarch/vendor.yml | 4 +- 120 files changed, 231547 insertions(+), 231552 deletions(-) create mode 100644 library/stdarch/crates/stdarch-test/Cargo.toml create mode 100644 library/stdarch/crates/stdarch-test/src/disassembly.rs create mode 100644 library/stdarch/crates/stdarch-test/src/lib.rs create mode 100644 library/stdarch/crates/stdarch-test/src/wasm.rs create mode 100644 library/stdarch/crates/stdarch-verify/.gitattributes create mode 100644 library/stdarch/crates/stdarch-verify/Cargo.toml create mode 100644 library/stdarch/crates/stdarch-verify/arm-intrinsics.html create mode 100644 library/stdarch/crates/stdarch-verify/build.rs create mode 100644 library/stdarch/crates/stdarch-verify/mips-msa.h create mode 100644 library/stdarch/crates/stdarch-verify/src/lib.rs create mode 100644 library/stdarch/crates/stdarch-verify/tests/arm.rs create mode 100644 library/stdarch/crates/stdarch-verify/tests/mips.rs create mode 100644 library/stdarch/crates/stdarch-verify/tests/x86-intel.rs create mode 100644 library/stdarch/crates/stdarch-verify/x86-intel.xml delete mode 100644 library/stdarch/crates/stdsimd-test/Cargo.toml delete mode 100644 library/stdarch/crates/stdsimd-test/src/disassembly.rs delete mode 100644 library/stdarch/crates/stdsimd-test/src/lib.rs delete mode 100644 library/stdarch/crates/stdsimd-test/src/wasm.rs delete mode 100644 library/stdarch/crates/stdsimd-verify/.gitattributes delete mode 100644 library/stdarch/crates/stdsimd-verify/Cargo.toml delete mode 100644 library/stdarch/crates/stdsimd-verify/arm-intrinsics.html delete mode 100644 library/stdarch/crates/stdsimd-verify/build.rs delete mode 100644 library/stdarch/crates/stdsimd-verify/mips-msa.h delete mode 100644 library/stdarch/crates/stdsimd-verify/src/lib.rs delete mode 100644 library/stdarch/crates/stdsimd-verify/tests/arm.rs delete mode 100644 library/stdarch/crates/stdsimd-verify/tests/mips.rs delete mode 100644 library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs delete mode 100644 library/stdarch/crates/stdsimd-verify/x86-intel.xml (limited to 'library/stdarch/crates/std_detect/src') diff --git a/library/stdarch/.travis.yml b/library/stdarch/.travis.yml index 171771b0735..93ad469e817 100644 --- a/library/stdarch/.travis.yml +++ b/library/stdarch/.travis.yml @@ -13,14 +13,14 @@ matrix: env: TARGET=x86_64-unknown-linux-gnu install: true - name: "x86_64-unknown-linux-gnu-emulated (runs all assert_instr tests)" - env: TARGET=x86_64-unknown-linux-gnu-emulated STDSIMD_TEST_EVERYTHING=1 RUSTFLAGS="--cfg stdsimd_intel_sde" + env: TARGET=x86_64-unknown-linux-gnu-emulated STDARCH_TEST_EVERYTHING=1 RUSTFLAGS="--cfg stdarch_intel_sde" install: true - name: "x86_64-linux-android - no assert_instr" - env: TARGET=x86_64-linux-android STDSIMD_DISABLE_ASSERT_INSTR=1 + env: TARGET=x86_64-linux-android STDARCH_DISABLE_ASSERT_INSTR=1 - name: "arm-unknown-linux-gnueabihf" env: TARGET=arm-unknown-linux-gnueabihf - name: "arm-linux-androideabi - no assert_instr" - env: TARGET=arm-linux-androideabi STDSIMD_DISABLE_ASSERT_INSTR=1 + env: TARGET=arm-linux-androideabi STDARCH_DISABLE_ASSERT_INSTR=1 - name: "armv7-unknown-linux-gnueabihf - NEON" env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" - name: "aarch64-unknown-linux-gnu" @@ -34,13 +34,13 @@ matrix: - name: "mips64el-unknown-linux-gnuabi64 - build-only" env: TARGET=mips64el-unknown-linux-gnuabi64 NORUN=1 - name: "aarch64-linux-android - no assert_instr" - env: TARGET=aarch64-linux-android STDSIMD_DISABLE_ASSERT_INSTR=1 + env: TARGET=aarch64-linux-android STDARCH_DISABLE_ASSERT_INSTR=1 - name: "powerpc-unknown-linux-gnu - no assert_instr" - env: TARGET=powerpc-unknown-linux-gnu STDSIMD_DISABLE_ASSERT_INSTR=1 + env: TARGET=powerpc-unknown-linux-gnu STDARCH_DISABLE_ASSERT_INSTR=1 - name: "powerpc64-unknown-linux-gnu - no assert_instr" - env: TARGET=powerpc64-unknown-linux-gnu STDSIMD_DISABLE_ASSERT_INSTR=1 + env: TARGET=powerpc64-unknown-linux-gnu STDARCH_DISABLE_ASSERT_INSTR=1 - name: "powerpc64le-unknown-linux-gnu - no assert_instr" - env: TARGET=powerpc64le-unknown-linux-gnu STDSIMD_DISABLE_ASSERT_INSTR=1 + env: TARGET=powerpc64le-unknown-linux-gnu STDARCH_DISABLE_ASSERT_INSTR=1 - name: "s390x-unknown-linux-gnu" env: TARGET=s390x-unknown-linux-gnu - name: "i686-apple-darwin" @@ -90,7 +90,7 @@ matrix: install: true script: ci/dox.sh - name: "Automatic verification: x86 / x86_64 / arm / aarch64 / mips*" - script: cargo test --manifest-path crates/stdsimd-verify/Cargo.toml + script: cargo test --manifest-path crates/stdarch-verify/Cargo.toml install: true - name: "rustfmt" install: true diff --git a/library/stdarch/CONTRIBUTING.md b/library/stdarch/CONTRIBUTING.md index 47a3d6ba665..85b7bb474fc 100644 --- a/library/stdarch/CONTRIBUTING.md +++ b/library/stdarch/CONTRIBUTING.md @@ -1,11 +1,11 @@ -# Contributing to stdsimd +# Contributing to stdarch -The `stdsimd` crate is more than willing to accept contributions! First you'll +The `stdarch` crate is more than willing to accept contributions! First you'll probably want to check out the repository and make sure that tests pass for you: ``` -$ git clone https://github.com/rust-lang-nursery/stdsimd -$ cd stdsimd +$ git clone https://github.com/rust-lang/stdarch +$ cd stdarch $ cargo +nightly test ``` @@ -29,7 +29,7 @@ around! Feel free to ping either @BurntSushi or @alexcrichton with questions. [gitter]: https://gitter.im/rust-impl-period/WG-libs-simd -# How to write examples for stdsimd intrinsics +# How to write examples for stdarch intrinsics There are a few features that must be enabled for the given intrinsic to work properly and the example must only be run by `cargo test --doc` when the feature @@ -44,9 +44,9 @@ to ensure your example works as expected. /// # // We need target_feature for the intrinsic to work /// # #![feature(target_feature)] /// # -/// # // rustdoc by default uses `extern crate stdsimd`, but we need the +/// # // rustdoc by default uses `extern crate stdarch`, but we need the /// # // `#[macro_use]` -/// # #[macro_use] extern crate stdsimd; +/// # #[macro_use] extern crate stdarch; /// # /// # // The real main function /// # fn main() { @@ -69,12 +69,12 @@ to ensure your example works as expected. If some of the above syntax does not look familiar, the [Documentation as tests] section of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping -to improve the documentation of `stdsimd`! +to improve the documentation of `stdarch`! -[new]: https://github.com/rust-lang-nursery/stdsimd/issues/new -[issues]: https://github.com/rust-lang-nursery/stdsimd/issues -[help]: https://github.com/rust-lang-nursery/stdsimd/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22 -[impl]: https://github.com/rust-lang-nursery/stdsimd/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period -[vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40 +[new]: https://github.com/rust-lang/stdarch/issues/new +[issues]: https://github.com/rust-lang/stdarch/issues +[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22 +[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period +[vendor]: https://github.com/rust-lang/stdarch/issues/40 [Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests [Rust Book]: https://doc.rust-lang.org/book/first-edition diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml index e97bf7f3632..7b4c5ead8a8 100644 --- a/library/stdarch/Cargo.toml +++ b/library/stdarch/Cargo.toml @@ -1,6 +1,6 @@ [workspace] members = [ - "crates/stdsimd-verify", + "crates/stdarch-verify", "crates/core_arch", "crates/std_detect", "examples/" diff --git a/library/stdarch/QUESTIONS.md b/library/stdarch/QUESTIONS.md index 3c6492556ca..bc04ea051ca 100644 --- a/library/stdarch/QUESTIONS.md +++ b/library/stdarch/QUESTIONS.md @@ -2,10 +2,10 @@ This program emits `psllw` instead of `pslliw` instruction. `pslliw` should be usable here since `imm8` is a constant: ```rust -extern crate stdsimd; +extern crate stdarch; use std::env; -use stdsimd as s; +use stdarch as s; fn main() { let imm8: i32 = env::args().nth(1).unwrap().parse().unwrap(); diff --git a/library/stdarch/README.md b/library/stdarch/README.md index 660295b0992..699d076a386 100644 --- a/library/stdarch/README.md +++ b/library/stdarch/README.md @@ -1,4 +1,4 @@ -stdsimd - Rust's standard library SIMD components +stdarch - Rust's standard library SIMD components ======= [![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] @@ -29,9 +29,9 @@ To do a release of the `core_arch` and `std_detect` crates, https://github.com/rust-lang/cargo/issues/4242), * publish the crates. -[travis]: https://travis-ci.com/rust-lang-nursery/stdsimd -[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master -[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master +[travis]: https://travis-ci.com/rust-lang/stdarch +[Travis-CI Status]: https://travis-ci.com/rust-lang/stdarch.svg?branch=master +[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdarch/branch/master [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true [core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg [core_arch_crate_link]: https://crates.io/crates/core_arch diff --git a/library/stdarch/ci/run-docker.sh b/library/stdarch/ci/run-docker.sh index 3db8e49ad90..48989f225d8 100755 --- a/library/stdarch/ci/run-docker.sh +++ b/library/stdarch/ci/run-docker.sh @@ -8,7 +8,7 @@ set -ex run() { target=$(echo "${1}" | sed 's/-emulated//') echo "Building docker container for TARGET=${1}" - docker build -t stdsimd -f "ci/docker/${1}/Dockerfile" ci/ + docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/ mkdir -p target echo "Running docker" # shellcheck disable=SC2016 @@ -20,18 +20,18 @@ run() { --env CARGO_HOME=/cargo-h \ --volume "$(rustc --print sysroot)":/rust:ro \ --env TARGET="${target}" \ - --env STDSIMD_TEST_EVERYTHING \ - --env STDSIMD_ASSERT_INSTR_IGNORE \ - --env STDSIMD_DISABLE_ASSERT_INSTR \ + --env STDARCH_TEST_EVERYTHING \ + --env STDARCH_ASSERT_INSTR_IGNORE \ + --env STDARCH_DISABLE_ASSERT_INSTR \ --env NOSTD \ --env NORUN \ --env RUSTFLAGS \ - --env STDSIMD_TEST_NORUN \ + --env STDARCH_TEST_NORUN \ --volume "$(pwd)":/checkout:ro \ --volume "$(pwd)"/target:/checkout/target \ --workdir /checkout \ --privileged \ - stdsimd \ + stdarch \ bash \ -c 'PATH=/rust/bin:$PATH exec ci/run.sh' } diff --git a/library/stdarch/ci/run.sh b/library/stdarch/ci/run.sh index bc60edd5b19..4fd8ba0ec83 100755 --- a/library/stdarch/ci/run.sh +++ b/library/stdarch/ci/run.sh @@ -10,7 +10,7 @@ set -ex #export RUST_TEST_NOCAPTURE=1 #export RUST_TEST_THREADS=1 -RUSTFLAGS="$RUSTFLAGS --cfg stdsimd_strict" +RUSTFLAGS="$RUSTFLAGS --cfg stdarch_strict" case ${TARGET} in # On 32-bit use a static relocation model which avoids some extra @@ -33,8 +33,8 @@ esac echo "RUSTFLAGS=${RUSTFLAGS}" echo "FEATURES=${FEATURES}" echo "OBJDUMP=${OBJDUMP}" -echo "STDSIMD_DISABLE_ASSERT_INSTR=${STDSIMD_DISABLE_ASSERT_INSTR}" -echo "STDSIMD_TEST_EVERYTHING=${STDSIMD_TEST_EVERYTHING}" +echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}" +echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}" cargo_test() { cmd="cargo" @@ -49,7 +49,7 @@ cargo_test() { CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml" STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml" -STDSIMD_EXAMPLES="--manifest-path=examples/Cargo.toml" +STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml" cargo_test "${CORE_ARCH} --release" if [ "$NOSTD" != "1" ]; then @@ -61,14 +61,14 @@ if [ "$NOSTD" != "1" ]; then cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval" cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io" - cargo_test "${STDSIMD_EXAMPLES}" - cargo_test "${STDSIMD_EXAMPLES} --release" + cargo_test "${STDARCH_EXAMPLES}" + cargo_test "${STDARCH_EXAMPLES} --release" fi # Test targets compiled with extra features. case ${TARGET} in x86*) - export STDSIMD_DISABLE_ASSERT_INSTR=1 + export STDARCH_DISABLE_ASSERT_INSTR=1 export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" cargo_test "--release" ;; diff --git a/library/stdarch/crates/assert-instr-macro/src/lib.rs b/library/stdarch/crates/assert-instr-macro/src/lib.rs index 61986411211..0e739605846 100644 --- a/library/stdarch/crates/assert-instr-macro/src/lib.rs +++ b/library/stdarch/crates/assert-instr-macro/src/lib.rs @@ -1,6 +1,6 @@ //! Implementation of the `#[assert_instr]` macro //! -//! This macro is used when testing the `stdsimd` crate and is used to generate +//! This macro is used when testing the `stdarch` crate and is used to generate //! test cases to assert that functions do indeed contain the instructions that //! we're expecting them to contain. //! @@ -41,7 +41,7 @@ pub fn assert_instr( // Disable assert_instr for x86 targets compiled with avx enabled, which // causes LLVM to generate different intrinsics that the ones we are // testing for. - let disable_assert_instr = std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok(); + let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok(); // If instruction tests are disabled avoid emitting this shim at all, just // return the original item without our attribute. @@ -57,7 +57,7 @@ pub fn assert_instr( let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span()); // These name has to be unique enough for us to find it in the disassembly later on: let shim_name = syn::Ident::new( - &format!("stdsimd_test_shim_{}_{}", name, instr_str), + &format!("stdarch_test_shim_{}_{}", name, instr_str), name.span(), ); let mut inputs = Vec::new(); @@ -123,7 +123,7 @@ pub fn assert_instr( // generate some code that's hopefully very tight in terms of // codegen but is otherwise unique to prevent code from being // folded. - ::stdsimd_test::_DONT_DEDUP.store( + ::stdarch_test::_DONT_DEDUP.store( std::mem::transmute(#shim_name_str.as_bytes().as_ptr()), std::sync::atomic::Ordering::Relaxed, ); @@ -142,7 +142,7 @@ pub fn assert_instr( // code: unsafe { asm!("" : : "r"(#shim_name as usize) : "memory" : "volatile") }; - ::stdsimd_test::assert(#shim_name as usize, + ::stdarch_test::assert(#shim_name as usize, stringify!(#shim_name), #instr); } diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml index e3fe73c4692..c67e3e15c9c 100644 --- a/library/stdarch/crates/core_arch/Cargo.toml +++ b/library/stdarch/crates/core_arch/Cargo.toml @@ -8,8 +8,8 @@ authors = [ ] description = "`core::arch` - Rust's core library architecture-specific intrinsics." documentation = "https://docs.rs/core_arch" -homepage = "https://github.com/rust-lang-nursery/stdsimd" -repository = "https://github.com/rust-lang-nursery/stdsimd" +homepage = "https://github.com/rust-lang/stdarch" +repository = "https://github.com/rust-lang/stdarch" readme = "README.md" keywords = ["core", "simd", "arch", "intrinsics"] categories = ["hardware-support", "no-std"] @@ -18,14 +18,14 @@ build = "build.rs" edition = "2018" [badges] -travis-ci = { repository = "rust-lang-nursery/stdsimd" } -appveyor = { repository = "rust-lang-nursery/stdsimd" } -is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" } -is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" } +travis-ci = { repository = "rust-lang/stdarch" } +appveyor = { repository = "rust-lang/stdarch" } +is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" } +is-it-maintained-open-issues = { repository = "rust-lang/stdarch" } maintenance = { status = "experimental" } [dev-dependencies] -stdsimd-test = { version = "0.*", path = "../stdsimd-test" } +stdarch-test = { version = "0.*", path = "../stdarch-test" } std_detect = { version = "0.*", path = "../std_detect" } [target.wasm32-unknown-unknown.dev-dependencies] diff --git a/library/stdarch/crates/core_arch/README.md b/library/stdarch/crates/core_arch/README.md index f8d10b4d620..89f03b7527d 100644 --- a/library/stdarch/crates/core_arch/README.md +++ b/library/stdarch/crates/core_arch/README.md @@ -39,14 +39,14 @@ are: * [How to get started][contrib] * [How to help implement intrinsics][help-implement] -[contrib]: https://github.com/rust-lang-nursery/stdsimd/blob/master/CONTRIBUTING.md -[help-implement]: https://github.com/rust-lang-nursery/stdsimd/issues/40 -[i686]: https://rust-lang-nursery.github.io/stdsimd/i686/core_arch/ -[x86_64]: https://rust-lang-nursery.github.io/stdsimd/x86_64/core_arch/ -[arm]: https://rust-lang-nursery.github.io/stdsimd/arm/core_arch/ -[aarch64]: https://rust-lang-nursery.github.io/stdsimd/aarch64/core_arch/ -[powerpc]: https://rust-lang-nursery.github.io/stdsimd/powerpc/core_arch/ -[powerpc64]: https://rust-lang-nursery.github.io/stdsimd/powerpc64/core_arch/ +[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md +[help-implement]: https://github.com/rust-lang/stdarch/issues/40 +[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/ +[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/ +[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/ +[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/ +[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/ +[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/ # License @@ -62,9 +62,9 @@ Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in `core_arch` by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. -[travis]: https://travis-ci.com/rust-lang-nursery/stdsimd -[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master -[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master +[travis]: https://travis-ci.com/rust-lang/stdarch +[Travis-CI Status]: https://travis-ci.com/rust-lang/stdarch.svg?branch=master +[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdarch/branch/master [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true [core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg [core_arch_crate_link]: https://crates.io/crates/core_arch diff --git a/library/stdarch/crates/core_arch/src/aarch64/crc.rs b/library/stdarch/crates/core_arch/src/aarch64/crc.rs index 18307d295d4..278a7856271 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/crc.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/crc.rs @@ -19,7 +19,7 @@ extern "C" { } #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// CRC32 single round checksum for bytes (8 bits). #[inline] @@ -89,7 +89,7 @@ pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { mod tests { use crate::core_arch::{aarch64::*, simd::*}; use std::mem; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "crc")] unsafe fn test_crc32b() { diff --git a/library/stdarch/crates/core_arch/src/aarch64/crypto.rs b/library/stdarch/crates/core_arch/src/aarch64/crypto.rs index 5d46070b233..91269d8052f 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/crypto.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/crypto.rs @@ -35,7 +35,7 @@ extern "C" { } #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// AES single round encryption. #[inline] @@ -165,7 +165,7 @@ pub unsafe fn vsha256su1q_u32( mod tests { use crate::core_arch::{aarch64::*, simd::*}; use std::mem; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "crypto")] unsafe fn test_vaeseq_u8() { diff --git a/library/stdarch/crates/core_arch/src/aarch64/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/mod.rs index 0f2f9da11a3..e33dc7eaf5b 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/mod.rs @@ -21,7 +21,7 @@ pub use self::crc::*; pub use super::acle::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Generates the trap instruction `BRK 1` #[cfg_attr(test, assert_instr(brk))] diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon.rs b/library/stdarch/crates/core_arch/src/aarch64/neon.rs index b96f7ac3cfe..46af4567b3b 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon.rs @@ -9,7 +9,7 @@ use crate::{ mem::{transmute, zeroed}, }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; types! { /// ARM-specific 64-bit wide vector of one packed `f64`. @@ -1546,7 +1546,7 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol mod tests { use crate::core_arch::{aarch64::*, simd::*}; use std::mem::transmute; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "neon")] unsafe fn test_vadd_f64() { diff --git a/library/stdarch/crates/core_arch/src/aarch64/v8.rs b/library/stdarch/crates/core_arch/src/aarch64/v8.rs index f44f43d34f4..778721c6851 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/v8.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/v8.rs @@ -6,7 +6,7 @@ //! ddi0487a.k_10775/index.html #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Reverse the order of the bytes. #[inline] diff --git a/library/stdarch/crates/core_arch/src/acle/dsp.rs b/library/stdarch/crates/core_arch/src/acle/dsp.rs index c87363ea030..03cc082697a 100644 --- a/library/stdarch/crates/core_arch/src/acle/dsp.rs +++ b/library/stdarch/crates/core_arch/src/acle/dsp.rs @@ -21,7 +21,7 @@ //! - \[x\] __smlawt #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; use crate::mem::transmute; @@ -241,7 +241,7 @@ mod tests { simd::{i16x2, i8x4, u8x4}, }; use std::{i32, mem::transmute}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[test] fn smulbb() { diff --git a/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs b/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs index 5cc66308be2..e0b71218a7f 100644 --- a/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs +++ b/library/stdarch/crates/core_arch/src/acle/registers/aarch32.rs @@ -3,7 +3,7 @@ pub struct APSR; // Note (@Lokathor): Because this breaks the use of Rust on the Game Boy // Advance, this change must be reverted until Rust learns to handle cpu state -// properly. See also: https://github.com/rust-lang-nursery/stdsimd/issues/702 +// properly. See also: https://github.com/rust-lang/stdarch/issues/702 //#[cfg(any(not(target_feature = "thumb-state"), target_feature = "v6t2"))] //rsr!(APSR); diff --git a/library/stdarch/crates/core_arch/src/acle/simd32.rs b/library/stdarch/crates/core_arch/src/acle/simd32.rs index fe5a818fd09..678324f1065 100644 --- a/library/stdarch/crates/core_arch/src/acle/simd32.rs +++ b/library/stdarch/crates/core_arch/src/acle/simd32.rs @@ -63,7 +63,7 @@ //! - \[x\] __smusdx #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; use crate::{core_arch::acle::dsp::int16x2_t, mem::transmute}; @@ -465,7 +465,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { mod tests { use crate::core_arch::simd::{i16x2, i8x4, u8x4}; use std::{i16, i8, mem::transmute}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[test] fn qadd8() { diff --git a/library/stdarch/crates/core_arch/src/arm/armclang.rs b/library/stdarch/crates/core_arch/src/arm/armclang.rs index 36a3a2fe9a3..2e0a82ade3e 100644 --- a/library/stdarch/crates/core_arch/src/arm/armclang.rs +++ b/library/stdarch/crates/core_arch/src/arm/armclang.rs @@ -7,7 +7,7 @@ //! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Inserts a breakpoint instruction. /// diff --git a/library/stdarch/crates/core_arch/src/arm/mod.rs b/library/stdarch/crates/core_arch/src/arm/mod.rs index 6829a8523c4..a467c2ce854 100644 --- a/library/stdarch/crates/core_arch/src/arm/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm/mod.rs @@ -37,7 +37,7 @@ pub use self::neon::*; pub use crate::core_arch::acle::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Generates the trap instruction `UDF` #[cfg(target_arch = "arm")] diff --git a/library/stdarch/crates/core_arch/src/arm/neon.rs b/library/stdarch/crates/core_arch/src/arm/neon.rs index 99097b229de..73391c84ebe 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon.rs @@ -2,7 +2,7 @@ use crate::{core_arch::simd_llvm::*, mem::transmute}; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; types! { /// ARM-specific 64-bit wide vector of eight packed `i8`. @@ -978,7 +978,7 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t mod tests { use crate::core_arch::{arm::*, simd::*}; use std::{i16, i32, i8, mem::transmute, u16, u32, u8}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "neon")] unsafe fn test_vadd_s8() { diff --git a/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs b/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs index 5b8b4878f6c..15aa2f2695d 100644 --- a/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs +++ b/library/stdarch/crates/core_arch/src/arm/table_lookup_tests.rs @@ -12,7 +12,7 @@ use crate::core_arch::arm::*; use crate::core_arch::simd::*; use std::mem; -use stdsimd_test::simd_test; +use stdarch_test::simd_test; macro_rules! test_vtbl { ($test_name:ident => $fn_id:ident: diff --git a/library/stdarch/crates/core_arch/src/arm/v6.rs b/library/stdarch/crates/core_arch/src/arm/v6.rs index c7ec7d411df..5df30cd623b 100644 --- a/library/stdarch/crates/core_arch/src/arm/v6.rs +++ b/library/stdarch/crates/core_arch/src/arm/v6.rs @@ -7,7 +7,7 @@ //! html #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Reverse the order of the bytes. #[inline] diff --git a/library/stdarch/crates/core_arch/src/arm/v7.rs b/library/stdarch/crates/core_arch/src/arm/v7.rs index 9ee4a461827..e7507f9b953 100644 --- a/library/stdarch/crates/core_arch/src/arm/v7.rs +++ b/library/stdarch/crates/core_arch/src/arm/v7.rs @@ -10,13 +10,13 @@ pub use super::v6::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Count Leading Zeros. #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382 +// FIXME: https://github.com/rust-lang/stdarch/issues/382 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))] pub unsafe fn _clz_u8(x: u8) -> u8 { x.leading_zeros() as u8 @@ -26,7 +26,7 @@ pub unsafe fn _clz_u8(x: u8) -> u8 { #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382 +// FIXME: https://github.com/rust-lang/stdarch/issues/382 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))] pub unsafe fn _clz_u16(x: u16) -> u16 { x.leading_zeros() as u16 @@ -36,7 +36,7 @@ pub unsafe fn _clz_u16(x: u16) -> u16 { #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382 +// FIXME: https://github.com/rust-lang/stdarch/issues/382 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))] pub unsafe fn _clz_u32(x: u32) -> u32 { x.leading_zeros() as u32 diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs index 424a185f88e..3be5859671c 100644 --- a/library/stdarch/crates/core_arch/src/lib.rs +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -1,5 +1,4 @@ #![doc(include = "core_arch_docs.md")] -#![cfg_attr(stdsimd_strict, deny(warnings))] #![allow(dead_code)] #![allow(unused_features)] #![feature( @@ -65,7 +64,7 @@ extern crate std; #[macro_use] extern crate std_detect; #[cfg(test)] -extern crate stdsimd_test; +extern crate stdarch_test; #[cfg(all(test, target_arch = "wasm32"))] extern crate wasm_bindgen_test; diff --git a/library/stdarch/crates/core_arch/src/mips/mod.rs b/library/stdarch/crates/core_arch/src/mips/mod.rs index d7ab34f5c3f..efde97f4df7 100644 --- a/library/stdarch/crates/core_arch/src/mips/mod.rs +++ b/library/stdarch/crates/core_arch/src/mips/mod.rs @@ -4,7 +4,7 @@ mod msa; pub use self::msa::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Generates the trap instruction `BREAK` #[cfg_attr(test, assert_instr(break))] diff --git a/library/stdarch/crates/core_arch/src/mips/msa.rs b/library/stdarch/crates/core_arch/src/mips/msa.rs index 18e1e78d983..46c9bd089af 100644 --- a/library/stdarch/crates/core_arch/src/mips/msa.rs +++ b/library/stdarch/crates/core_arch/src/mips/msa.rs @@ -6,7 +6,7 @@ //! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; use crate::mem; @@ -9235,7 +9235,7 @@ mod tests { mem, }; use std::{f32, f64}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "msa")] unsafe fn test_msa_add_a_b() { diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs index 940f6398614..88603024b95 100644 --- a/library/stdarch/crates/core_arch/src/mod.rs +++ b/library/stdarch/crates/core_arch/src/mod.rs @@ -10,7 +10,7 @@ mod simd; #[cfg_attr( bootstrap, - doc(include = "../stdsimd/crates/core_arch/src/core_arch_docs.md") + doc(include = "../stdarch/crates/core_arch/src/core_arch_docs.md") )] #[cfg_attr(not(bootstrap), doc(include = "core_arch_docs.md"))] #[stable(feature = "simd_arch", since = "1.27.0")] diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs index 2bcec9374b8..2f56ecf8f3d 100644 --- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs +++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs @@ -19,7 +19,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; types! { /// PowerPC-specific 128-bit wide vector of sixteen packed `i8` @@ -1714,7 +1714,7 @@ mod tests { use std::mem::transmute; use crate::core_arch::simd::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; macro_rules! test_vec_2 { { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => { diff --git a/library/stdarch/crates/core_arch/src/powerpc/mod.rs b/library/stdarch/crates/core_arch/src/powerpc/mod.rs index 4bfee70e1fc..9765d11d1f4 100644 --- a/library/stdarch/crates/core_arch/src/powerpc/mod.rs +++ b/library/stdarch/crates/core_arch/src/powerpc/mod.rs @@ -9,7 +9,7 @@ mod vsx; pub use self::vsx::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Generates the trap instruction `TRAP` #[cfg_attr(test, assert_instr(trap))] diff --git a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs index 77b7306b027..394c9a7043b 100644 --- a/library/stdarch/crates/core_arch/src/powerpc/vsx.rs +++ b/library/stdarch/crates/core_arch/src/powerpc/vsx.rs @@ -11,7 +11,7 @@ use crate::core_arch::simd_llvm::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; use crate::mem; @@ -93,7 +93,7 @@ mod tests { use super::mem; use crate::core_arch::simd::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; macro_rules! test_vec_xxpermdi { {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { diff --git a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs index 4ebbaa19bd2..b8ffaeac0e2 100644 --- a/library/stdarch/crates/core_arch/src/wasm32/atomic.rs +++ b/library/stdarch/crates/core_arch/src/wasm32/atomic.rs @@ -9,7 +9,7 @@ #![cfg(any(target_feature = "atomics", dox))] #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; diff --git a/library/stdarch/crates/core_arch/src/wasm32/memory.rs b/library/stdarch/crates/core_arch/src/wasm32/memory.rs index 0ccc104c63b..f2c7fa54c3d 100644 --- a/library/stdarch/crates/core_arch/src/wasm32/memory.rs +++ b/library/stdarch/crates/core_arch/src/wasm32/memory.rs @@ -1,5 +1,5 @@ #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; diff --git a/library/stdarch/crates/core_arch/src/wasm32/mod.rs b/library/stdarch/crates/core_arch/src/wasm32/mod.rs index d95eb0890da..5e7a9d85f4e 100644 --- a/library/stdarch/crates/core_arch/src/wasm32/mod.rs +++ b/library/stdarch/crates/core_arch/src/wasm32/mod.rs @@ -1,7 +1,7 @@ //! WASM32 intrinsics #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs index 7bf579f38e3..99d02a0a441 100644 --- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs +++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs @@ -13,7 +13,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; diff --git a/library/stdarch/crates/core_arch/src/x86/abm.rs b/library/stdarch/crates/core_arch/src/x86/abm.rs index 9ee69f46e66..50912f77423 100644 --- a/library/stdarch/crates/core_arch/src/x86/abm.rs +++ b/library/stdarch/crates/core_arch/src/x86/abm.rs @@ -18,7 +18,7 @@ //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Counts the leading most significant zero bits. /// @@ -46,7 +46,7 @@ pub unsafe fn _popcnt32(x: i32) -> i32 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/adx.rs b/library/stdarch/crates/core_arch/src/x86/adx.rs index 3bcb4d8ba25..6df321c049b 100644 --- a/library/stdarch/crates/core_arch/src/x86/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86/adx.rs @@ -1,5 +1,5 @@ #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "unadjusted" { @@ -48,7 +48,7 @@ pub unsafe fn _subborrow_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/aes.rs b/library/stdarch/crates/core_arch/src/x86/aes.rs index bc45fc39da5..603744aef6e 100644 --- a/library/stdarch/crates/core_arch/src/x86/aes.rs +++ b/library/stdarch/crates/core_arch/src/x86/aes.rs @@ -10,7 +10,7 @@ use crate::core_arch::x86::__m128i; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -111,7 +111,7 @@ mod tests { // __m128i happens to be defined in terms of signed integers. #![allow(overflowing_literals)] - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index 180b6e72387..56067ba5c93 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -21,7 +21,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Adds packed double-precision (64-bit) floating-point elements /// in `a` and `b`. @@ -54,7 +54,7 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { #[inline] #[target_feature(enable = "avx")] // FIXME: Should be 'vandpd' instuction. -// See https://github.com/rust-lang-nursery/stdsimd/issues/71 +// See https://github.com/rust-lang/stdarch/issues/71 #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { @@ -84,7 +84,7 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { #[inline] #[target_feature(enable = "avx")] // FIXME: should be `vorpd` instuction. -// See . +// See . #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { @@ -3309,7 +3309,7 @@ extern "C" { #[cfg(test)] mod tests { use crate::hint::black_box; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index fcc1fdfd087..84f3364b922 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -24,7 +24,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Computes the absolute values of packed 32-bit integers in `a`. /// @@ -4057,7 +4057,7 @@ extern "C" { #[cfg(test)] mod tests { use std; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index ad1a5a40704..f38583102b9 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -4,7 +4,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Computes the absolute values of packed 32-bit integers in `a`. /// @@ -97,7 +97,7 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { #[cfg(test)] mod tests { use std; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs index ad3a21a796c..425d0ff7e5e 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512ifma.rs @@ -1,7 +1,7 @@ use crate::core_arch::x86::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit @@ -106,7 +106,7 @@ extern "C" { #[cfg(test)] mod tests { use std; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/bmi1.rs b/library/stdarch/crates/core_arch/src/x86/bmi1.rs index f9df86b50a3..0f769f33b0f 100644 --- a/library/stdarch/crates/core_arch/src/x86/bmi1.rs +++ b/library/stdarch/crates/core_arch/src/x86/bmi1.rs @@ -10,7 +10,7 @@ //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. @@ -118,7 +118,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/bmi2.rs b/library/stdarch/crates/core_arch/src/x86/bmi2.rs index b709d1187d3..b08b8733c2f 100644 --- a/library/stdarch/crates/core_arch/src/x86/bmi2.rs +++ b/library/stdarch/crates/core_arch/src/x86/bmi2.rs @@ -11,7 +11,7 @@ //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Unsigned multiply without affecting flags. /// @@ -77,7 +77,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/bswap.rs b/library/stdarch/crates/core_arch/src/x86/bswap.rs index 2896781f847..20e3aa6fc35 100644 --- a/library/stdarch/crates/core_arch/src/x86/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86/bswap.rs @@ -2,7 +2,7 @@ #![allow(clippy::module_name_repetitions)] #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Returns an integer with the reversed byte order of x /// diff --git a/library/stdarch/crates/core_arch/src/x86/bt.rs b/library/stdarch/crates/core_arch/src/x86/bt.rs index 8ecc87fe4cb..6e42828dd7c 100644 --- a/library/stdarch/crates/core_arch/src/x86/bt.rs +++ b/library/stdarch/crates/core_arch/src/x86/bt.rs @@ -1,5 +1,5 @@ #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Returns the bit in position `b` of the memory addressed by `p`. #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs index c52157599c6..32a13b532e5 100644 --- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs +++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs @@ -2,7 +2,7 @@ #![allow(clippy::module_name_repetitions)] #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Result of the `cpuid` instruction. #[allow(clippy::missing_inline_in_public_items)] diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs index 195485914b3..503bd41d2fd 100644 --- a/library/stdarch/crates/core_arch/src/x86/f16c.rs +++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs @@ -9,7 +9,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "unadjusted" { @@ -110,7 +110,7 @@ pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i { #[cfg(test)] mod tests { use crate::{core_arch::x86::*, mem::transmute}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "f16c")] unsafe fn test_mm_cvtph_ps() { diff --git a/library/stdarch/crates/core_arch/src/x86/fma.rs b/library/stdarch/crates/core_arch/src/x86/fma.rs index de4e589fe14..48abe9f49a9 100644 --- a/library/stdarch/crates/core_arch/src/x86/fma.rs +++ b/library/stdarch/crates/core_arch/src/x86/fma.rs @@ -21,7 +21,7 @@ use crate::core_arch::x86::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. @@ -508,7 +508,7 @@ extern "C" { #[cfg(test)] mod tests { use std; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/fxsr.rs b/library/stdarch/crates/core_arch/src/x86/fxsr.rs index eeca495e0b4..83d53f4773d 100644 --- a/library/stdarch/crates/core_arch/src/x86/fxsr.rs +++ b/library/stdarch/crates/core_arch/src/x86/fxsr.rs @@ -1,7 +1,7 @@ //! FXSR floating-point context fast save and restor. #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -59,7 +59,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) { mod tests { use crate::core_arch::x86::*; use std::{cmp::PartialEq, fmt}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[repr(align(16))] struct FxsaveArea { diff --git a/library/stdarch/crates/core_arch/src/x86/mmx.rs b/library/stdarch/crates/core_arch/src/x86/mmx.rs index 96a3c2d881d..ff4f8277e25 100644 --- a/library/stdarch/crates/core_arch/src/x86/mmx.rs +++ b/library/stdarch/crates/core_arch/src/x86/mmx.rs @@ -14,7 +14,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Constructs a 64-bit integer vector initialized to zero. #[inline] @@ -532,7 +532,7 @@ extern "C" { #[cfg(test)] mod tests { use crate::core_arch::x86::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "mmx")] unsafe fn test_mm_setzero_si64() { diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs index bed9e4a020e..68965ad93de 100644 --- a/library/stdarch/crates/core_arch/src/x86/mod.rs +++ b/library/stdarch/crates/core_arch/src/x86/mod.rs @@ -519,14 +519,14 @@ pub use self::bmi1::*; mod bmi2; pub use self::bmi2::*; -#[cfg(not(stdsimd_intel_sde))] +#[cfg(not(stdarch_intel_sde))] mod sse4a; -#[cfg(not(stdsimd_intel_sde))] +#[cfg(not(stdarch_intel_sde))] pub use self::sse4a::*; -#[cfg(not(stdsimd_intel_sde))] +#[cfg(not(stdarch_intel_sde))] mod tbm; -#[cfg(not(stdsimd_intel_sde))] +#[cfg(not(stdarch_intel_sde))] pub use self::tbm::*; mod mmx; @@ -548,7 +548,7 @@ mod adx; pub use self::adx::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Generates the trap instruction `UD2` #[cfg_attr(test, assert_instr(ud2))] diff --git a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs index 9b9d21c9add..0e1bebae9ee 100644 --- a/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs +++ b/library/stdarch/crates/core_arch/src/x86/pclmulqdq.rs @@ -8,7 +8,7 @@ use crate::core_arch::x86::__m128i; #[cfg(test)] -use crate::stdsimd_test::assert_instr; +use crate::stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -48,7 +48,7 @@ mod tests { // __m128i happens to be defined in terms of signed integers. #![allow(overflowing_literals)] - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/rdrand.rs b/library/stdarch/crates/core_arch/src/x86/rdrand.rs index a1252933522..c6bab914892 100644 --- a/library/stdarch/crates/core_arch/src/x86/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86/rdrand.rs @@ -16,7 +16,7 @@ extern "unadjusted" { } #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Read a hardware generated 16-bit random value and store the result in val. /// Returns 1 if a random value was generated, and 0 otherwise. diff --git a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs index a92ff4b3ece..67f6e48fa7b 100644 --- a/library/stdarch/crates/core_arch/src/x86/rdtsc.rs +++ b/library/stdarch/crates/core_arch/src/x86/rdtsc.rs @@ -1,7 +1,7 @@ //! RDTSC instructions. #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Reads the current value of the processor’s time-stamp counter. /// @@ -60,7 +60,7 @@ extern "C" { #[cfg(test)] mod tests { use crate::core_arch::x86::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse2")] unsafe fn _rdtsc() { diff --git a/library/stdarch/crates/core_arch/src/x86/rtm.rs b/library/stdarch/crates/core_arch/src/x86/rtm.rs index ebe3ed80da7..1e532b4efe7 100644 --- a/library/stdarch/crates/core_arch/src/x86/rtm.rs +++ b/library/stdarch/crates/core_arch/src/x86/rtm.rs @@ -14,7 +14,7 @@ //! [intel_consid]: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-intel-transactional-synchronization-extensions-intel-tsx-programming-considerations #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; extern "C" { #[link_name = "llvm.x86.xbegin"] @@ -107,7 +107,7 @@ pub const fn _xabort_code(status: u32) -> u32 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs index c6cbc5324f5..362a97ccd36 100644 --- a/library/stdarch/crates/core_arch/src/x86/sha.rs +++ b/library/stdarch/crates/core_arch/src/x86/sha.rs @@ -22,7 +22,7 @@ extern "C" { } #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Performs an intermediate calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, @@ -141,7 +141,7 @@ mod tests { core_arch::{simd::*, x86::*}, hint::black_box, }; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sha")] #[allow(overflowing_literals)] diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index f03773d1ea0..3160ac57b80 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -6,7 +6,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Adds the first component of `a` and `b`, the other components are copied /// from `a`. @@ -978,7 +978,7 @@ pub unsafe fn _mm_setzero_ps() -> __m128 { /// permute intrinsics. #[inline] #[allow(non_snake_case)] -#[unstable(feature = "stdsimd", issue = "27731")] +#[unstable(feature = "stdarch", issue = "27731")] pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { ((z << 6) | (y << 4) | (x << 2) | w) as i32 } @@ -2499,7 +2499,7 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { mod tests { use crate::{hint::black_box, mem::transmute}; use std::{boxed, f32::NAN}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::{simd::*, x86::*}; diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index 2c621b21fc7..21a3b4a0816 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -1,7 +1,7 @@ //! Streaming SIMD Extensions 2 (SSE2) #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, @@ -3193,7 +3193,7 @@ mod tests { i32, mem::{self, transmute}, }; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[test] fn test_mm_pause() { diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs index 9e335e12c8d..977de1dc17a 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse3.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs @@ -10,7 +10,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Alternatively add and subtract packed single-precision (32-bit) /// floating-point elements in `a` to/from packed elements in `b`. @@ -165,7 +165,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 2e0a0acb461..f8616296718 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -6,7 +6,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; // SSE4 rounding constans /// round to nearest @@ -1194,7 +1194,7 @@ extern "C" { mod tests { use crate::core_arch::x86::*; use std::mem; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blendv_epi8() { diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs index eca71233ad5..d4d8aa644e4 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse42.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs @@ -3,7 +3,7 @@ //! Extends SSE4.1 with STTNI (String and Text New Instructions). #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, @@ -707,7 +707,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; use std::ptr; diff --git a/library/stdarch/crates/core_arch/src/x86/sse4a.rs b/library/stdarch/crates/core_arch/src/x86/sse4a.rs index 51401e783f7..e6345d0da90 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse4a.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse4a.rs @@ -6,7 +6,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -84,7 +84,7 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { #[cfg(test)] mod tests { use crate::core_arch::x86::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse4a")] unsafe fn test_mm_extract_si64() { diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs index 07eb620534c..6a45603e441 100644 --- a/library/stdarch/crates/core_arch/src/x86/ssse3.rs +++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs @@ -6,7 +6,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Computes the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. @@ -560,7 +560,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/tbm.rs b/library/stdarch/crates/core_arch/src/x86/tbm.rs index dd8f800c68a..d1102a11693 100644 --- a/library/stdarch/crates/core_arch/src/x86/tbm.rs +++ b/library/stdarch/crates/core_arch/src/x86/tbm.rs @@ -11,7 +11,7 @@ //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; // FIXME(blocked on #248) // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: @@ -279,7 +279,7 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86/xsave.rs b/library/stdarch/crates/core_arch/src/x86/xsave.rs index 124cb39b78d..f3814dd7f62 100644 --- a/library/stdarch/crates/core_arch/src/x86/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86/xsave.rs @@ -2,7 +2,7 @@ #![allow(clippy::module_name_repetitions)] #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -165,7 +165,7 @@ mod tests { use std::{fmt, prelude::v1::*}; use crate::core_arch::x86::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[repr(align(64))] struct XsaveArea { @@ -209,7 +209,7 @@ mod tests { } } - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 + // FIXME: https://github.com/rust-lang/stdarch/issues/209 /* #[simd_test(enable = "xsave")] unsafe fn xsave() { @@ -237,7 +237,7 @@ mod tests { assert_eq!(xcr, xcr_cpy); } - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 + // FIXME: https://github.com/rust-lang/stdarch/issues/209 /* #[simd_test(enable = "xsave,xsaveopt")] unsafe fn xsaveopt() { @@ -253,7 +253,7 @@ mod tests { */ // FIXME: this looks like a bug in Intel's SDE: - #[cfg(not(stdsimd_intel_sde))] + #[cfg(not(stdarch_intel_sde))] #[simd_test(enable = "xsave,xsavec")] unsafe fn xsavec() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers @@ -266,7 +266,7 @@ mod tests { assert_eq!(a, b); } - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 + // FIXME: https://github.com/rust-lang/stdarch/issues/209 /* #[simd_test(enable = "xsave,xsaves")] unsafe fn xsaves() { diff --git a/library/stdarch/crates/core_arch/src/x86_64/abm.rs b/library/stdarch/crates/core_arch/src/x86_64/abm.rs index b5a2b267a6e..988074d673d 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/abm.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/abm.rs @@ -18,7 +18,7 @@ //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Counts the leading most significant zero bits. /// @@ -46,7 +46,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::arch::x86_64::*; diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs index d1d295d7728..57efe75ddd7 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/adx.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -1,5 +1,5 @@ #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "unadjusted" { @@ -48,7 +48,7 @@ pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86_64::*; diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs index 082913e9353..5215fd6fbf3 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs @@ -33,7 +33,7 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86::*; diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs index f31c018f547..7cc3fb6efec 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs @@ -36,7 +36,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 { #[cfg(test)] mod tests { use crate::core_arch::arch::x86_64::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "avx2")] unsafe fn test_mm256_extract_epi64() { diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs index 6e1296d5fed..9f71a8d3885 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs @@ -10,7 +10,7 @@ //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. @@ -123,7 +123,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::{x86::*, x86_64::*}; diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs index 99b7fa0256e..356d95a3d13 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs @@ -11,7 +11,7 @@ //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Unsigned multiply without affecting flags. /// @@ -79,7 +79,7 @@ extern "C" { #[cfg(test)] mod tests { - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use crate::core_arch::x86_64::*; diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs index 08bf1d2f87b..9e8e76d4f70 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -3,7 +3,7 @@ #![allow(clippy::module_name_repetitions)] #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Returns an integer with the reversed byte order of x /// diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs index c6de6b28d5c..9c6dcf7b61b 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/bt.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs @@ -1,5 +1,5 @@ #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Returns the bit in position `b` of the memory addressed by `p`. #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs index 3c4d4f6421e..391daed20ee 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs @@ -1,7 +1,7 @@ use crate::sync::atomic::Ordering; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Compares and exchange 16 bytes (128 bits) of data atomically. /// diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs index 543ed6dcee0..0b26fb6d04f 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs @@ -1,7 +1,7 @@ //! FXSR floating-point context fast save and restor. #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -59,7 +59,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) { mod tests { use crate::core_arch::x86_64::*; use std::{cmp::PartialEq, fmt}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[repr(align(16))] struct FxsaveArea { diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs index 7b6d0de01f8..e5ec933fb93 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -13,7 +13,7 @@ extern "unadjusted" { } #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Read a hardware generated 64-bit random value and store the result in val. /// Returns 1 if a random value was generated, and 0 otherwise. diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs index 066d22fa664..a93215072ab 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs @@ -3,7 +3,7 @@ use crate::core_arch::x86::*; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -72,7 +72,7 @@ pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { mod tests { use crate::core_arch::arch::x86_64::*; use std::{f32::NAN, i64::MIN}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse")] unsafe fn test_mm_cvtss_si64() { diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs index 37bfc45cf82..94a919d4fe7 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs @@ -6,7 +6,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -149,7 +149,7 @@ pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { mod tests { use crate::core_arch::arch::x86_64::*; use std::{boxed, f64, i64}; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsd_si64() { diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs index 36e498888ab..18c315c90fe 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs @@ -6,7 +6,7 @@ use crate::{ }; #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; /// Extracts an 64-bit integer from `a` selected with `imm8` /// @@ -37,7 +37,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { #[cfg(test)] mod tests { use crate::core_arch::arch::x86_64::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse4.1")] unsafe fn test_mm_extract_epi64() { diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs index 03db7aca384..405073261ce 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs @@ -1,7 +1,7 @@ //! `x86_64`'s Streaming SIMD Extensions 4.2 (SSE4.2) #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -25,7 +25,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { mod tests { use crate::core_arch::arch::x86_64::*; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u64() { diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs index 964a24d12e2..8296695058b 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -3,7 +3,7 @@ #![allow(clippy::module_name_repetitions)] #[cfg(test)] -use stdsimd_test::assert_instr; +use stdarch_test::assert_instr; #[allow(improper_ctypes)] extern "C" { @@ -124,16 +124,16 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) { xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 +// FIXME: https://github.com/rust-lang/stdarch/issues/209 // All these tests fail with Intel SDE. /* #[cfg(test)] mod tests { use crate::core_arch::x86::x86_64::xsave; - use stdsimd_test::simd_test; + use stdarch_test::simd_test; use std::fmt; - // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 + // FIXME: https://github.com/rust-lang/stdarch/issues/209 #[repr(align(64))] struct XsaveArea { // max size for 256-bit registers is 800 bytes: diff --git a/library/stdarch/crates/core_arch/tests/cpu-detection.rs b/library/stdarch/crates/core_arch/tests/cpu-detection.rs index 321f24e9fc2..07b56c616d6 100644 --- a/library/stdarch/crates/core_arch/tests/cpu-detection.rs +++ b/library/stdarch/crates/core_arch/tests/cpu-detection.rs @@ -1,5 +1,4 @@ #![feature(stdsimd)] -#![cfg_attr(stdsimd_strict, deny(warnings))] #![allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs index 67f3059ebee..4d1170cc7e9 100644 --- a/library/stdarch/crates/simd-test-macro/src/lib.rs +++ b/library/stdarch/crates/simd-test-macro/src/lib.rs @@ -126,7 +126,7 @@ pub fn simd_test( #emms return v; } else { - ::stdsimd_test::assert_skip_test_ok(stringify!(#name)); + ::stdarch_test::assert_skip_test_ok(stringify!(#name)); } #[target_feature(enable = #enable_feature)] diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml index ae633566591..8902868c623 100644 --- a/library/stdarch/crates/std_detect/Cargo.toml +++ b/library/stdarch/crates/std_detect/Cargo.toml @@ -8,8 +8,8 @@ authors = [ ] description = "`std::detect` - Rust's standard library run-time CPU feature detection." documentation = "https://docs.rs/std_detect" -homepage = "https://github.com/rust-lang-nursery/stdsimd" -repository = "https://github.com/rust-lang-nursery/stdsimd" +homepage = "https://github.com/rust-lang/stdarch" +repository = "https://github.com/rust-lang/stdarch" readme = "README.md" keywords = ["std", "run-time", "feature", "detection"] categories = ["hardware-support"] @@ -17,10 +17,10 @@ license = "MIT/Apache-2.0" edition = "2015" [badges] -travis-ci = { repository = "rust-lang-nursery/stdsimd" } -appveyor = { repository = "rust-lang-nursery/stdsimd" } -is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" } -is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" } +travis-ci = { repository = "rust-lang/stdarch" } +appveyor = { repository = "rust-lang/stdarch" } +is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" } +is-it-maintained-open-issues = { repository = "rust-lang/stdarch" } maintenance = { status = "experimental" } [dependencies] diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md index 4d2ec7d3448..1c83a54b80d 100644 --- a/library/stdarch/crates/std_detect/README.md +++ b/library/stdarch/crates/std_detect/README.md @@ -75,9 +75,9 @@ Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in `std_detect` by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. -[travis]: https://travis-ci.com/rust-lang-nursery/stdsimd -[Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master -[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master +[travis]: https://travis-ci.com/rust-lang/stdarch +[Travis-CI Status]: https://travis-ci.com/rust-lang/stdarch.svg?branch=master +[appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdarch/branch/master [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true [std_detect_crate_badge]: https://img.shields.io/crates/v/std_detect.svg [std_detect_crate_link]: https://crates.io/crates/std_detect diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs index 8b3bb304f1c..7737719c3b0 100644 --- a/library/stdarch/crates/std_detect/src/lib.rs +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -17,7 +17,6 @@ #![deny(clippy::missing_inline_in_public_items)] #![cfg_attr(target_os = "linux", feature(linkage))] #![cfg_attr(all(target_os = "freebsd", target_arch = "aarch64"), feature(asm))] -#![cfg_attr(stdsimd_strict, deny(warnings))] #![cfg_attr(test, allow(unused_imports))] #![no_std] diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs index 0aae39e2947..d0c9901c4cd 100644 --- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs +++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs @@ -1,5 +1,4 @@ #![feature(stdsimd)] -#![cfg_attr(stdsimd_strict, deny(warnings))] #![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)] #[cfg(any( diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs index df03e6555aa..e950523d0e5 100644 --- a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs +++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs @@ -1,5 +1,4 @@ #![feature(stdsimd)] -#![cfg_attr(stdsimd_strict, deny(warnings))] #![allow(clippy::option_unwrap_used, clippy::use_debug, clippy::print_stdout)] #[cfg(any( diff --git a/library/stdarch/crates/stdarch-test/Cargo.toml b/library/stdarch/crates/stdarch-test/Cargo.toml new file mode 100644 index 00000000000..2b445f8dc5b --- /dev/null +++ b/library/stdarch/crates/stdarch-test/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "stdarch-test" +version = "0.1.0" +authors = ["Alex Crichton "] + +[dependencies] +assert-instr-macro = { path = "../assert-instr-macro" } +simd-test-macro = { path = "../simd-test-macro" } +cc = "1.0" +lazy_static = "1.0" +rustc-demangle = "0.1.8" +cfg-if = "0.1" + +[target.wasm32-unknown-unknown.dependencies] +wasm-bindgen = "0.2.47" +js-sys = "0.3" +console_error_panic_hook = "0.1" + +[features] +default = [] diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs new file mode 100644 index 00000000000..23ebd92e740 --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs @@ -0,0 +1,182 @@ +//! Disassembly calling function for most targets. + +use std::{env, collections::HashSet, process::Command, str}; +use crate::Function; + +// Extracts the "shim" name from the `symbol`. +fn normalize(mut symbol: &str) -> String { + // Remove trailing colon: + if symbol.ends_with(':') { + symbol = &symbol[..symbol.len() - 1]; + } + if symbol.ends_with('>') { + symbol = &symbol[..symbol.len() - 1]; + } + if let Some(idx) = symbol.find('<') { + symbol = &symbol[idx + 1..]; + } + + let mut symbol = rustc_demangle::demangle(symbol).to_string(); + symbol = match symbol.rfind("::h") { + Some(i) => symbol[..i].to_string(), + None => symbol.to_string(), + }; + + // Remove Rust paths + if let Some(last_colon) = symbol.rfind(':') { + symbol = (&symbol[last_colon + 1..]).to_string(); + } + + // Normalize to no leading underscore to handle platforms that may + // inject extra ones in symbol names. + while symbol.starts_with('_') { + symbol.remove(0); + } + symbol +} + +pub(crate) fn disassemble_myself() -> HashSet { + let me = env::current_exe().expect("failed to get current exe"); + + let disassembly = if cfg!(target_arch = "x86_64") + && cfg!(target_os = "windows") + && cfg!(target_env = "msvc") + { + let mut cmd = cc::windows_registry::find( + "x86_64-pc-windows-msvc", + "dumpbin.exe", + ).expect("failed to find `dumpbin` tool"); + let output = cmd + .arg("/DISASM") + .arg(&me) + .output() + .expect("failed to execute dumpbin"); + println!( + "{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + assert!(output.status.success()); + String::from_utf8(output.stdout) + } else if cfg!(target_os = "windows") { + panic!("disassembly unimplemented") + } else if cfg!(target_os = "macos") { + let output = Command::new("otool") + .arg("-vt") + .arg(&me) + .output() + .expect("failed to execute otool"); + println!( + "{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + assert!(output.status.success()); + + String::from_utf8(output.stdout) + } else { + let objdump = + env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string()); + let output = Command::new(objdump.clone()) + .arg("--disassemble") + .arg(&me) + .output() + .unwrap_or_else(|_| panic!( + "failed to execute objdump. OBJDUMP={}", + objdump + )); + println!( + "{}\n{}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); + assert!(output.status.success()); + + String::from_utf8(output.stdout) + }.expect("failed to convert to utf8"); + + parse(&disassembly) +} + +fn parse(output: &str) -> HashSet { + let mut lines = output.lines(); + + for line in output.lines().take(100) { + println!("{}", line); + } + + let mut functions = HashSet::new(); + let mut cached_header = None; + while let Some(header) = cached_header.take().or_else(|| lines.next()) { + if !header.ends_with(':') || !header.contains("stdarch_test_shim") { + continue + } + let symbol = normalize(header); + let mut instructions = Vec::new(); + while let Some(instruction) = lines.next() { + if instruction.ends_with(':') { + cached_header = Some(instruction); + break; + } + if instruction.is_empty() { + cached_header = None; + break; + } + let parts = if cfg!(target_os = "macos") { + // Each line of instructions should look like: + // + // $addr $instruction... + instruction + .split_whitespace() + .skip(1) + .map(std::string::ToString::to_string) + .collect::>() + } else if cfg!(target_env = "msvc") { + // Each line looks like: + // + // > $addr: ab cd ef $instr.. + // > 00 12 # this line os optional + if instruction.starts_with(" ") { + continue; + } + instruction + .split_whitespace() + .skip(1) + .skip_while(|s| { + s.len() == 2 && usize::from_str_radix(s, 16).is_ok() + }).map(std::string::ToString::to_string) + .skip_while(|s| *s == "lock") // skip x86-specific prefix + .collect::>() + } else { + // objdump + // Each line of instructions should look like: + // + // $rel_offset: ab cd ef 00 $instruction... + let expected_len + = if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") { + 8 + } else { + 2 + }; + + instruction + .split_whitespace() + .skip(1) + .skip_while(|s| { + s.len() == expected_len + && usize::from_str_radix(s, 16).is_ok() + }) + .skip_while(|s| *s == "lock") // skip x86-specific prefix + .map(std::string::ToString::to_string) + .collect::>() + }; + instructions.push(parts.join(" ")); + } + let function = Function { + name: symbol, + instrs: instructions + }; + assert!(functions.insert(function)); + } + functions +} diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs new file mode 100644 index 00000000000..a284697d6bc --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/lib.rs @@ -0,0 +1,178 @@ +//! Runtime support needed for testing the stdarch crate. +//! +//! This basically just disassembles the current executable and then parses the +//! output once globally and then provides the `assert` function which makes +//! assertions about the disassembly of a function. +#![feature(const_str_as_bytes)] +#![feature(const_transmute)] +#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] + +extern crate assert_instr_macro; +extern crate cc; +#[macro_use] +extern crate lazy_static; +extern crate rustc_demangle; +extern crate simd_test_macro; +#[macro_use] +extern crate cfg_if; + +pub use assert_instr_macro::*; +pub use simd_test_macro::*; +use std::{cmp, collections::HashSet, env, hash, str, sync::atomic::AtomicPtr}; + +// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!` +// macro with our own shim that redirects to `console.log`. +#[allow(unused)] +#[cfg(target_arch = "wasm32")] +#[macro_export] +macro_rules! println { + ($($args:tt)*) => (crate::wasm::js_console_log(&format!($($args)*))) +} + +cfg_if! { + if #[cfg(target_arch = "wasm32")] { + extern crate wasm_bindgen; + extern crate console_error_panic_hook; + pub mod wasm; + use wasm::disassemble_myself; + } else { + mod disassembly; + use disassembly::disassemble_myself; + } +} + +lazy_static! { + static ref DISASSEMBLY: HashSet = disassemble_myself(); +} + +#[derive(Debug)] +struct Function { + name: String, + instrs: Vec, +} +impl Function { + fn new(n: &str) -> Self { + Self { + name: n.to_string(), + instrs: Vec::new(), + } + } +} + +impl cmp::PartialEq for Function { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} +impl cmp::Eq for Function {} + +impl hash::Hash for Function { + fn hash(&self, state: &mut H) { + self.name.hash(state) + } +} + +/// Main entry point for this crate, called by the `#[assert_instr]` macro. +/// +/// This asserts that the function at `fnptr` contains the instruction +/// `expected` provided. +pub fn assert(_fnptr: usize, fnname: &str, expected: &str) { + //eprintln!("shim name: {}", fnname); + let function = &DISASSEMBLY + .get(&Function::new(fnname)) + .unwrap_or_else(|| panic!("function \"{}\" not found in the disassembly", fnname)); + //eprintln!(" function: {:?}", function); + + let mut instrs = &function.instrs[..]; + while instrs.last().map_or(false, |s| s == "nop") { + instrs = &instrs[..instrs.len() - 1]; + } + + // Look for `expected` as the first part of any instruction in this + // function, e.g., tzcntl in tzcntl %rax,%rax. + let found = instrs.iter().any(|s| s.contains(expected)); + + // Look for `call` instructions in the disassembly to detect whether + // inlining failed: all intrinsics are `#[inline(always)]`, so + // calling one intrinsic from another should not generate `call` + // instructions. + let inlining_failed = instrs.windows(2).any(|s| { + // On 32-bit x86 position independent code will call itself and be + // immediately followed by a `pop` to learn about the current address. + // Let's not take that into account when considering whether a function + // failed inlining something. + s[0].contains("call") && (!cfg!(target_arch = "x86") || s[1].contains("pop")) + }); + + let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT") + .ok() + .map_or_else( + || match expected { + // `cpuid` returns a pretty big aggregate structure, so exempt + // it from the slightly more restrictive 22 instructions below. + "cpuid" => 30, + + // Apparently, on Windows, LLVM generates a bunch of + // saves/restores of xmm registers around these intstructions, + // which exceeds the limit of 20 below. As it seems dictated by + // Windows's ABI (I believe?), we probably can't do much + // about it. + "vzeroall" | "vzeroupper" if cfg!(windows) => 30, + + // Intrinsics using `cvtpi2ps` are typically "composites" and + // in some cases exceed the limit. + "cvtpi2ps" => 25, + + // core_arch/src/acle/simd32 + "usad8" => 27, + "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, + + // Original limit was 20 instructions, but ARM DSP Intrinsics + // are exactly 20 instructions long. So, bump the limit to 22 + // instead of adding here a long list of exceptions. + _ => 22, + }, + |v| v.parse().unwrap(), + ); + let probably_only_one_instruction = instrs.len() < instruction_limit; + + if found && probably_only_one_instruction && !inlining_failed { + return; + } + + // Help debug by printing out the found disassembly, and then panic as we + // didn't find the instruction. + println!("disassembly for {}: ", fnname,); + for (i, instr) in instrs.iter().enumerate() { + println!("\t{:2}: {}", i, instr); + } + + if !found { + panic!( + "failed to find instruction `{}` in the disassembly", + expected + ); + } else if !probably_only_one_instruction { + panic!( + "instruction found, but the disassembly contains too many \ + instructions: #instructions = {} >= {} (limit)", + instrs.len(), + instruction_limit + ); + } else if inlining_failed { + panic!( + "instruction found, but the disassembly contains `call` \ + instructions, which hint that inlining failed" + ); + } +} + +pub fn assert_skip_test_ok(name: &str) { + if env::var("STDARCH_TEST_EVERYTHING").is_err() { + return; + } + panic!("skipped test `{}` when it shouldn't be skipped", name); +} + +// See comment in `assert-instr-macro` crate for why this exists +pub static _DONT_DEDUP: AtomicPtr = AtomicPtr::new(b"".as_ptr() as *mut _); diff --git a/library/stdarch/crates/stdarch-test/src/wasm.rs b/library/stdarch/crates/stdarch-test/src/wasm.rs new file mode 100644 index 00000000000..b31051bc68f --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/wasm.rs @@ -0,0 +1,88 @@ +//! Disassembly calling function for `wasm32` targets. +use wasm_bindgen::prelude::*; + +use crate::Function; +use std::collections::HashSet; + +#[wasm_bindgen(module = "child_process")] +extern "C" { + #[wasm_bindgen(js_name = execFileSync)] + fn exec_file_sync(cmd: &str, args: &js_sys::Array, opts: &js_sys::Object) -> Buffer; +} + +#[wasm_bindgen(module = "buffer")] +extern "C" { + type Buffer; + #[wasm_bindgen(method, js_name = toString)] + fn to_string(this: &Buffer) -> String; +} + +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(js_namespace = require)] + fn resolve(module: &str) -> String; + #[wasm_bindgen(js_namespace = console, js_name = log)] + pub fn js_console_log(s: &str); +} + +pub(crate) fn disassemble_myself() -> HashSet { + use std::path::Path; + ::console_error_panic_hook::set_once(); + // Our wasm module in the wasm-bindgen test harness is called + // "wasm-bindgen-test_bg". When running in node this is actually a shim JS + // file. Ask node where that JS file is, and then we use that with a wasm + // extension to find the wasm file itself. + let js_shim = resolve("wasm-bindgen-test_bg"); + let js_shim = Path::new(&js_shim).with_extension("wasm"); + + // Execute `wasm2wat` synchronously, waiting for and capturing all of its + // output. Note that we pass in a custom `maxBuffer` parameter because we're + // generating a ton of output that needs to be buffered. + let args = js_sys::Array::new(); + args.push(&js_shim.display().to_string().into()); + args.push(&"--enable-simd".into()); + let opts = js_sys::Object::new(); + js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into()) + .unwrap(); + let output = exec_file_sync("wasm2wat", &args, &opts).to_string(); + + let mut ret: HashSet = HashSet::new(); + let mut lines = output.lines().map(|s| s.trim()); + while let Some(line) = lines.next() { + // If this isn't a function, we don't care about it. + if !line.starts_with("(func ") { + continue; + } + + let mut function = Function { + name: String::new(), + instrs: Vec::new(), + }; + + // Empty functions will end in `))` so there's nothing to do, otherwise + // we'll have a bunch of following lines which are instructions. + // + // Lines that have an imbalanced `)` mark the end of a function. + if !line.ends_with("))") { + while let Some(line) = lines.next() { + function.instrs.push(line.to_string()); + if !line.starts_with("(") && line.ends_with(")") { + break; + } + } + } + // The second element here split on whitespace should be the name of + // the function, skipping the type/params/results + function.name = line.split_whitespace().nth(1).unwrap().to_string(); + if function.name.starts_with("$") { + function.name = function.name[1..].to_string() + } + + if !function.name.contains("stdarch_test_shim") { + continue; + } + + assert!(ret.insert(function)); + } + return ret; +} diff --git a/library/stdarch/crates/stdarch-verify/.gitattributes b/library/stdarch/crates/stdarch-verify/.gitattributes new file mode 100644 index 00000000000..621fdea6f7d --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/.gitattributes @@ -0,0 +1 @@ +*.xml binary diff --git a/library/stdarch/crates/stdarch-verify/Cargo.toml b/library/stdarch/crates/stdarch-verify/Cargo.toml new file mode 100644 index 00000000000..1414b110070 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "stdarch-verify" +version = "0.1.0" +authors = ["Alex Crichton "] +edition = "2018" + +[dependencies] +proc-macro2 = "0.4" +quote = "0.6" +syn = { version = "0.15", features = ["full"] } + +[lib] +proc-macro = true +test = false + +[dev-dependencies] +serde = { version = "1.0", features = ['derive'] } +serde-xml-rs = "0.3" +html5ever = "0.23.0" diff --git a/library/stdarch/crates/stdarch-verify/arm-intrinsics.html b/library/stdarch/crates/stdarch-verify/arm-intrinsics.html new file mode 100644 index 00000000000..ac246c6bae2 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/arm-intrinsics.html @@ -0,0 +1,93399 @@ + + + + + + + + Technologies | NEON Intrinsics Reference – Arm Developer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ + Sorry, your browser is not supported. We recommend upgrading your browser. + We have done our best to make all the documentation and resources available on old versions of Internet Explorer, but vector image support and the layout may not be optimal. Technical documentation is available as a PDF Download. + +

+
+ + + + +
+ + + + + + + + + + +
+ + + + + +
+ +
+ + + +
+
+
+

NEON Intrinsics

+

Click on the intrinsic name to display more information about the intrinsic. To search for an intrinsic, enter the name of the intrinsic in the search box. As you type, the matching intrinsics will be displayed.

+
+
+
+ + +
+
+
+

+ + + + + + + +
+ +
+ +
+

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADD Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

+

A64 Instruction

SADDW Vd.8H,Vn.8H,Vm.8B
+

Argument Preparation

a → Vn.8H 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

+

A64 Instruction

SADDW Vd.4S,Vn.4S,Vm.4H
+

Argument Preparation

a → Vn.4S 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

+

A64 Instruction

SADDW Vd.2D,Vn.2D,Vm.2S
+

Argument Preparation

a → Vn.2D 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDW Vd.8H,Vn.8H,Vm.8B
+

Argument Preparation

a → Vn.8H 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDW Vd.4S,Vn.4S,Vm.4H
+

Argument Preparation

a → Vn.4S 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDW Vd.2D,Vn.2D,Vm.2S
+

Argument Preparation

a → Vn.2D 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

+

A64 Instruction

SADDW2 Vd.8H,Vn.8H,Vm.16B
+

Argument Preparation

a → Vn.8H 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

+

A64 Instruction

SADDW2 Vd.4S,Vn.4S,Vm.8H
+

Argument Preparation

a → Vn.4S 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

+

A64 Instruction

SADDW2 Vd.2D,Vn.2D,Vm.4S
+

Argument Preparation

a → Vn.2D 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDW2 Vd.8H,Vn.8H,Vm.16B
+

Argument Preparation

a → Vn.8H 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDW2 Vd.4S,Vn.4S,Vm.8H
+

Argument Preparation

a → Vn.4S 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDW2 Vd.2D,Vn.2D,Vm.4S
+

Argument Preparation

a → Vn.2D 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    Elem[result, e, esize] = sum<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRHADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRHADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRHADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRHADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRHADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRHADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URHADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URHADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URHADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URHADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URHADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URHADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, esize] = (element1+element2+1)<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQADD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer sum;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    sum = element1 + element2;
+    (Elem[result, e, esize], sat) = SatQ(sum, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.8B,Vn.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.16B,Vn.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.4H,Vn.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.8H,Vn.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.2S,Vn.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.4S,Vn.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Dd,Dn
+

Argument Preparation

a → Dd 
+b → Dn

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Vd.2D,Vn.2D
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Bd,Bn
+

Argument Preparation

a → Bd 
+b → Bn

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Hd,Hn
+

Argument Preparation

a → Hd 
+b → Hn

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Sd,Sn
+

Argument Preparation

a → Sd 
+b → Sn

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

+

A64 Instruction

SUQADD Dd,Dn
+

Argument Preparation

a → Dd 
+b → Dn

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.8B,Vn.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.16B,Vn.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.4H,Vn.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.8H,Vn.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.2S,Vn.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.4S,Vn.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Dd,Dn
+

Argument Preparation

a → Dd 
+b → Dn

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Vd.2D,Vn.2D
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Bd,Bn
+

Argument Preparation

a → Bd 
+b → Bn

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Hd,Hn
+

Argument Preparation

a → Hd 
+b → Hn

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Sd,Sn
+

Argument Preparation

a → Sd 
+b → Sn

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

USQADD Dd,Dn
+

Argument Preparation

a → Dd 
+b → Dn

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(datasize) operand2 = V[d];
+integer op1;
+integer op2;
+boolean sat;
+
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, e, esize], !unsigned);
+    op2 = Int(Elem[operand2, e, esize], unsigned);
+    (Elem[result, e, esize], sat) = SatQ(op1 + op2, esize, unsigned);
+    if sat then FPSR.QC = '1';
+V[d] = result;
+

Supported architectures

A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

ADDHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RADDHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

PMUL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

PMUL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vn.2D 
+v → Vm.1D
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.2S
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vn.2D 
+v → Vm.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.4S
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMULX Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(idxdsize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2 = Elem[operand2, index, esize];
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    if mulx_op then
+        Elem[result, e, esize] = FPMulX(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FDIV Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPDiv(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FDIV Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPDiv(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FDIV Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPDiv(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FDIV Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPDiv(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 3
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * c[i]) for i = 0
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * c[i]) for i = 0 to 1
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8H 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8H 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B
+c → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B
+c → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 3
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * c[i]) for i = 0
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * c[i]) for i = 0 to 1
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8H 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8H 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B
+c → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B
+c → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, adds the product to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FMADD Dd,Dn,Dm,Da
+

Argument Preparation

a → Da 
+b → Dn
+c → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) result;
+bits(datasize) operanda = V[a];
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+
+result = FPMulAdd(operanda, operand1, operand2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+c → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+v → Vm.1D
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sd 
+b → Sn
+v → Vm.2S
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+v → Vm.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sd 
+b → Sn
+v → Vm.4S
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, adds that to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FMSUB Dd,Dn,Dm,Da
+

Argument Preparation

a → Da 
+b → Dn
+c → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) result;
+bits(datasize) operanda = V[a];
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+
+operand1 = FPNeg(operand1);
+result = FPMulAdd(operanda, operand1, operand2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+c → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+v → Vm.1D
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sd 
+b → Sn
+v → Vm.2S
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+v → Vm.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sd 
+b → Sn
+v → Vm.4S
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dd 
+b → Dn
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Sd,Hn,Hm
+

Argument Preparation

a → Sd 
+b → Hn
+c → Hm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Dd,Sn,Sm
+

Argument Preparation

a → Dd 
+b → Sn
+c → Sm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Sd,Hn,Hm
+

Argument Preparation

a → Sd 
+b → Hn
+c → Hm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Dd,Sn,Sm
+

Argument Preparation

a → Dd 
+b → Sn
+c → Sm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

PMULL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, 2*esize] = PolynomialMult(element1, element2);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

PMULL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, 2*esize] = PolynomialMult(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Sd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Dd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSUB Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then
+        Elem[result, e, esize] = element1 - element2;
+    else
+        Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SSUBL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SSUBL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SSUBL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USUBL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USUBL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USUBL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SSUBL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SSUBL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SSUBL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USUBL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USUBL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USUBL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

SSUBW Vd.8H,Vn.8H,Vm.8B
+

Argument Preparation

a → Vn.8H 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

SSUBW Vd.4S,Vn.4S,Vm.4H
+

Argument Preparation

a → Vn.4S 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

SSUBW Vd.2D,Vn.2D,Vm.2S
+

Argument Preparation

a → Vn.2D 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

USUBW Vd.8H,Vn.8H,Vm.8B
+

Argument Preparation

a → Vn.8H 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

USUBW Vd.4S,Vn.4S,Vm.4H
+

Argument Preparation

a → Vn.4S 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

USUBW Vd.2D,Vn.2D,Vm.2S
+

Argument Preparation

a → Vn.2D 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

SSUBW2 Vd.8H,Vn.8H,Vm.16B
+

Argument Preparation

a → Vn.8H 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

SSUBW2 Vd.4S,Vn.4S,Vm.8H
+

Argument Preparation

a → Vn.4S 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

SSUBW2 Vd.2D,Vn.2D,Vm.4S
+

Argument Preparation

a → Vn.2D 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

USUBW2 Vd.8H,Vn.8H,Vm.16B
+

Argument Preparation

a → Vn.8H 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

USUBW2 Vd.4S,Vn.4S,Vm.8H
+

Argument Preparation

a → Vn.4S 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

+

A64 Instruction

USUBW2 Vd.2D,Vn.2D,Vm.4S
+

Argument Preparation

a → Vn.2D 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+integer sum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, 2*esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    Elem[result, e, 2*esize] = sum<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHSUB Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHSUB Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHSUB Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHSUB Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHSUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHSUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHSUB Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHSUB Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHSUB Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHSUB Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHSUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UHSUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    Elem[result, e, esize] = diff<esize:1>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSUB Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer diff;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    diff = element1 - element2;
+    (Elem[result, e, esize], sat) = SatQ(diff, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SUBHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN Vd.8B,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN Vd.4H,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN Vd.2S,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN2 Vd.16B,Vn.8H,Vm.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+b → Vm.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN2 Vd.8H,Vn.4S,Vm.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+b → Vm.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

+

A64 Instruction

RSUBHN2 Vd.4S,Vn.2D,Vm.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+b → Vm.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand1 = V[n];
+bits(2*datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if round then 1 << (esize - 1) else 0;
+bits(2*esize) element1;
+bits(2*esize) element2;
+bits(2*esize) sum;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, 2*esize];
+    element2 = Elem[operand2, e, 2*esize];
+    if sub_op then
+        sum = element1 - element2;
+    else
+        sum = element1 + element2;
+    sum = sum + round_const;
+    Elem[result, e, esize] = sum<2*esize-1:esize>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4H,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8H,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4H,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8H,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Sd,Sn,#0
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMEQ Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.8B,Vm.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.16B,Vm.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.4H,Vm.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.8H,Vm.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.8B,Vm.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.16B,Vm.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.4H,Vm.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.8H,Vm.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.4H,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.8H,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Sd,Sn,#0
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.8B,Vm.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.16B,Vm.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.4H,Vm.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.8H,Vm.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.8B,Vm.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.16B,Vm.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.4H,Vm.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.8H,Vm.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGE Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHS Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Sd,Sm,Sn
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGE Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.4H,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.8H,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLE Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLE Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLE Sd,Sn,#0
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLE Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.4H,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.8H,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Sd,Sn,#0
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.8B,Vm.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.16B,Vm.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.4H,Vm.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.8H,Vm.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.8B,Vm.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.16B,Vm.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.4H,Vm.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.8H,Vm.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMGT Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMHI Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    test_passed = if cmp_eq then element1 >= element2 else element1 > element2;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Sd,Sm,Sn
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMGT Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.8B,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.16B,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.4H,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.8H,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLT Vd.2S,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLT Vd.4S,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLT Vd.2D,Vn.2D,#0
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMLT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    case comparison of
+        when CompareOp_GT test_passed = element > 0;
+        when CompareOp_GE test_passed = element >= 0;
+        when CompareOp_EQ test_passed = element == 0;
+        when CompareOp_LE test_passed = element <= 0;
+        when CompareOp_LT test_passed = element < 0;
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLT Sd,Sn,#0
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FCMLT Dd,Dn,#0
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) zero = FPZero('0');
+bits(esize) element;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    case comparison of
+        when CompareOp_GT test_passed = FPCompareGT(element, zero, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element, zero, FPCR);
+        when CompareOp_EQ test_passed = FPCompareEQ(element, zero, FPCR);
+        when CompareOp_LE test_passed = FPCompareGE(zero, element, FPCR);
+        when CompareOp_LT test_passed = FPCompareGT(zero, element, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Vd.2D,Vm.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Sd,Sm,Sn
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGE Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Vd.2S,Vm.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Vd.4S,Vm.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Sd,Sm,Sn
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

FACGT Dd,Dm,Dn
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if abs then
+        element1 = FPAbs(element1);
+        element2 = FPAbs(element2);
+    case cmp of
+        when CompareOp_EQ test_passed = FPCompareEQ(element1, element2, FPCR);
+        when CompareOp_GE test_passed = FPCompareGE(element1, element2, FPCR);
+        when CompareOp_GT test_passed = FPCompareGT(element1, element2, FPCR);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

+

A64 Instruction

CMTST Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+boolean test_passed;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if and_test then
+        test_passed = !IsZero(element1 AND element2);
+    else
+        test_passed = (element1 == element2);
+    Elem[result, e, esize] = if test_passed then Ones() else Zeros();
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SABD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SABD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SABD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SABD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SABD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SABD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UABD Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UABD Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UABD Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UABD Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UABD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UABD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABD Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABD Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABD Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABD Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABD Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) diff;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    diff = FPSub(element1, element2, FPCR);
+    Elem[result, e, esize] = if abs then FPAbs(diff) else diff;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABDL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABDL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABDL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABDL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABDL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABDL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABDL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABDL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute Difference Long. This instruction subtracts the vector elements of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABDL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABDL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABDL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABDL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

SABA Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

SABA Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

SABA Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

SABA Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

SABA Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

SABA Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

UABA Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

UABA Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

UABA Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

UABA Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

UABA Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

+

A64 Instruction

UABA Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+bits(esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<esize-1:0>;
+    Elem[result, e, esize] = Elem[result, e, esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABAL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8H 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABAL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABAL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABAL Vd.8H,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8H 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABAL Vd.4S,Vn.4H,Vm.4H
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.4H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABAL Vd.2D,Vn.2S,Vm.2S
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.2S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABAL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B
+c → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABAL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SABAL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABAL2 Vd.8H,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B
+c → Vm.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABAL2 Vd.4S,Vn.8H,Vm.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UABAL2 Vd.2D,Vn.4S,Vm.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) absdiff;
+
+result = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    absdiff = Abs(element1-element2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + absdiff;
+V[d] = result;
+

Supported architectures

A64

Description

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAX Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAX Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAX Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAX Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAX Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAX Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAX Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAX Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAX Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAX Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAX Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAX Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAX Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAX Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAX Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAX Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMIN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMIN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMIN Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMIN Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMIN Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMIN Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMIN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMIN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMIN Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMIN Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMIN Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMIN Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMIN Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMIN Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMIN Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMIN Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAXNM Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAXNM Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAXNM Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMAXNM Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMINNM Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMINNM Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMINNM Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMINNM Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

USHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SRSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Bd,Bn,Bm
+

Argument Preparation

a → Bn 
+b → Bm

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Hd,Hn,Hm
+

Argument Preparation

a → Hn 
+b → Hm

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQRSHL Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

+

A64 Instruction

SSHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

+

A64 Instruction

USHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = LSL(Elem[operand, e, esize], shift);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

+

A64 Instruction

SRSHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

+

A64 Instruction

URSHR Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

+

A64 Instruction

SSRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

+

A64 Instruction

USRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

+

A64 Instruction

SRSRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

+

A64 Instruction

URSRA Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2;
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+operand2 = if accumulate then V[d] else Zeros();
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, esize], unsigned) + round_const) >> shift;
+    Elem[result, e, esize] = Elem[operand2, e, esize] + element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Bd,Bn,#n
+

Argument Preparation

a → Bn 
+0 << n << 7

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Hd,Hn,#n
+

Argument Preparation

a → Hn 
+0 << n << 15

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Sd,Sn,#n
+

Argument Preparation

a → Sn 
+0 << n << 31

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQSHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Bd,Bn,#n
+

Argument Preparation

a → Bn 
+0 << n << 7

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Hd,Hn,#n
+

Argument Preparation

a → Hn 
+0 << n << 15

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Sd,Sn,#n
+

Argument Preparation

a → Sn 
+0 << n << 31

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UQSHL Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer round_const = 0;
+integer shift;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    shift = SInt(Elem[operand2, e, esize]<7:0>);
+    if rounding then
+        round_const = 1 << (-shift - 1);    // 0 for left shift, 2^(n-1) for right shift 
+    element = (Int(Elem[operand1, e, esize], unsigned) + round_const) << shift;
+    if saturating then
+        (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+        if sat then FPSR.QC = '1';
+    else
+        Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Bd,Bn,#n
+

Argument Preparation

a → Bn 
+0 << n << 7

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Hd,Hn,#n
+

Argument Preparation

a → Hn 
+0 << n << 15

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Sd,Sn,#n
+

Argument Preparation

a → Sn 
+0 << n << 31

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

+

A64 Instruction

SQSHLU Dd,Dn,#n
+

Argument Preparation

a → Dn 
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], src_unsigned) << shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, dst_unsigned);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

+

A64 Instruction

SHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN Bd,Hn,#n
+

Argument Preparation

a → Hn 
+1 << n << 8

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN Hd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 16

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN Sd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

+

A64 Instruction

SQSHRUN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN Bd,Hn,#n
+

Argument Preparation

a → Hn 
+1 << n << 8

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN Hd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 16

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN Sd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

+

A64 Instruction

SQRSHRUN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (SInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = UnsignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN Bd,Hn,#n
+

Argument Preparation

a → Hn 
+1 << n << 8

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN Hd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 16

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN Sd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN Bd,Hn,#n
+

Argument Preparation

a → Hn 
+1 << n << 8

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN Hd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 16

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN Sd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

+

A64 Instruction

SQSHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

+

A64 Instruction

UQSHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

+

A64 Instruction

RSHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → 32(Vd) 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+
+for e = 0 to elements-1
+    element = (UInt(Elem[operand, e, 2*esize]) + round_const) >> shift;
+    Elem[result, e, esize] = element<esize-1:0>;
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN Vd.8B,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN Vd.4H,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN Vd.2S,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN Bd,Hn,#n
+

Argument Preparation

a → Hn 
+1 << n << 8

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN Hd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 16

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN Sd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN Bd,Hn,#n
+

Argument Preparation

a → Hn 
+1 << n << 8

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN Hd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 16

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN Sd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

+

A64 Instruction

SQRSHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN2 Vd.16B,Vn.8H,#n
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN2 Vd.8H,Vn.4S,#n
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

+

A64 Instruction

UQRSHRN2 Vd.4S,Vn.2D,#n
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize*2) operand = V[n];
+bits(datasize) result;
+integer round_const = if round then (1 << (shift - 1)) else 0;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = (Int(Elem[operand, e, 2*esize], unsigned) + round_const) >> shift;
+    (Elem[result, e, esize], sat) = SatQ(element, esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL Vd.8H,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL Vd.4S,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL Vd.2D,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL Vd.8H,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+0 << n << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL Vd.4S,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+0 << n << 15

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL Vd.2D,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+0 << n << 31

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL2 Vd.8H,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL2 Vd.4S,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL2 Vd.2D,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL2 Vd.8H,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+0 << n << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL2 Vd.4S,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+0 << n << 15

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL2 Vd.2D,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+0 << n << 31

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL Vd.8H,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+8 << n << 8

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL Vd.4S,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+16 << n << 16

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL Vd.2D,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+32 << n << 32

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL Vd.8H,Vn.8B,#n
+

Argument Preparation

a → Vn.8B 
+8 << n << 8

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL Vd.4S,Vn.4H,#n
+

Argument Preparation

a → Vn.4H 
+16 << n << 16

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL Vd.2D,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+32 << n << 32

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL2 Vd.8H,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+8 << n << 8

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL2 Vd.4S,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+16 << n << 16

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL2 Vd.2D,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+32 << n << 32

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL2 Vd.8H,Vn.16B,#n
+

Argument Preparation

a → Vn.16B 
+8 << n << 8

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL2 Vd.4S,Vn.8H,#n
+

Argument Preparation

a → Vn.8H 
+16 << n << 16

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SHLL2 Vd.2D,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+32 << n << 32

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+1 << n << 8

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+1 << n << 8

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+1 << n << 16

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+1 << n << 16

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A64

Description

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

+

A64 Instruction

SRI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSR(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSR(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+0 << n << 31

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+0 << n << 31

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+0 << n << 63

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.8B,Vn.8B,#n
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.16B,Vn.16B,#n
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.4H,Vn.4H,#n
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+0 << n << 15

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Vd.8H,Vn.8H,#n
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+0 << n << 15

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A64

Description

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

+

A64 Instruction

SLI Dd,Dn,#n
+

Argument Preparation

a → Dd 
+b → Dn
+0 << n << 63

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) operand2 = V[d];
+bits(datasize) result;
+bits(esize) mask = LSL(Ones(esize), shift);
+bits(esize) shifted;
+
+for e = 0 to elements-1
+    shifted = LSL(Elem[operand, e, esize], shift);
+    Elem[result, e, esize] = (Elem[operand2, e, esize] AND NOT(mask)) OR shifted;
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNU Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNU Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMU Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMU Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPU Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPU Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAU Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAU Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNS Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNU Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMS Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMU Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPS Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPU Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAS Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAU Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNU Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMU Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPU Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAU Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTNU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTMU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTPU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTAU Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Sd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Sd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZS Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FCVTZU Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPToFixed(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Vd.2S,Vn.2S,#n
+

Argument Preparation

a → Vn.2S 
+1 << n << 32

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Vd.4S,Vn.4S,#n
+

Argument Preparation

a → Vn.4S 
+1 << n << 32

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Sd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Sd,Sn,#n
+

Argument Preparation

a → Sn 
+1 << n << 32

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Vd.2D,Vn.2D,#n
+

Argument Preparation

a → Vn.2D 
+1 << n << 64

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

SCVTF Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

UCVTF Dd,Dn,#n
+

Argument Preparation

a → Dn 
+1 << n << 64

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+FPRounding rounding = FPRoundingMode(FPCR);
+bits(esize) element;
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FixedToFP(element, 0, unsigned, FPCR, rounding);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the FPCR.

+

A64 Instruction

FCVTN Vd.4H,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR);
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the FPCR.

+

A64 Instruction

FCVTN2 Vd.8H,Vn.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR);
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the FPCR.

+

A64 Instruction

FCVTN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR);
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the FPCR.

+

A64 Instruction

FCVTN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR);
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the FPCR, and writes each result to the equivalent element of the vector in the SIMD&FP destination register.

+

A64 Instruction

FCVTL Vd.4S,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, 2*esize] = FPConvert(Elem[operand, e, esize], FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the FPCR, and writes each result to the equivalent element of the vector in the SIMD&FP destination register.

+

A64 Instruction

FCVTL2 Vd.4S,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, 2*esize] = FPConvert(Elem[operand, e, esize], FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the FPCR, and writes each result to the equivalent element of the vector in the SIMD&FP destination register.

+

A64 Instruction

FCVTL Vd.2D,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, 2*esize] = FPConvert(Elem[operand, e, esize], FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the FPCR, and writes each result to the equivalent element of the vector in the SIMD&FP destination register.

+

A64 Instruction

FCVTL2 Vd.2D,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(2*datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, 2*esize] = FPConvert(Elem[operand, e, esize], FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FCVTXN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR, FPRounding_ODD);
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FCVTXN Sd,Dn
+

Argument Preparation

a → Dn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR, FPRounding_ODD);
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FCVTXN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = FPConvert(Elem[operand, e, 2*esize], FPCR, FPRounding_ODD);
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTZ Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTZ Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTZ Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTZ Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTN Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTN Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTN Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTN Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTN Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTM Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTM Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTM Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTM Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTP Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTP Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTP Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTP Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTA Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTA Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTA Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTA Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTI Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTI Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTI Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTI Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTX Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTX Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTX Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FRINTX Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRoundInt(element, FPCR, rounding, exact);
+
+V[d] = result;
+

Supported architectures

A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN Vd.8B,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN Vd.4H,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN Vd.8B,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN Vd.4H,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN2 Vd.16B,Vn.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN2 Vd.8H,Vn.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN2 Vd.16B,Vn.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN2 Vd.8H,Vn.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

XTN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    Elem[result, e, esize] = element<esize-1:0>;
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL Vd.8H,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL Vd.4S,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL Vd.2D,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL Vd.8H,Vn.8B,#0
+

Argument Preparation

a → Vn.8B 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL Vd.4S,Vn.4H,#0
+

Argument Preparation

a → Vn.4H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL Vd.2D,Vn.2S,#0
+

Argument Preparation

a → Vn.2S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL2 Vd.8H,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL2 Vd.4S,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SSHLL2 Vd.2D,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL2 Vd.8H,Vn.16B,#0
+

Argument Preparation

a → Vn.16B 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL2 Vd.4S,Vn.8H,#0
+

Argument Preparation

a → Vn.8H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

USHLL2 Vd.2D,Vn.4S,#0
+

Argument Preparation

a → Vn.4S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = Vpart[n, part];
+bits(datasize*2) result;
+integer element;
+
+for e = 0 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned) << shift;
+    Elem[result, e, 2*esize] = element<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN Vd.8B,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN Vd.4H,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN Vd.8B,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN Vd.4H,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN Bd,Hn
+

Argument Preparation

a → Hn 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN Hd,Sn
+

Argument Preparation

a → Sn 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN Sd,Dn
+

Argument Preparation

a → Dn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN Bd,Hn
+

Argument Preparation

a → Hn 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN Hd,Sn
+

Argument Preparation

a → Sn 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN Sd,Dn
+

Argument Preparation

a → Dn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN2 Vd.16B,Vn.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN2 Vd.8H,Vn.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SQXTN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN2 Vd.16B,Vn.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN2 Vd.8H,Vn.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UQXTN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = SatQ(Int(element, unsigned), esize, unsigned);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN Vd.8B,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN Vd.4H,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN Vd.2S,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN Bd,Hn
+

Argument Preparation

a → Hn 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN Hd,Sn
+

Argument Preparation

a → Sn 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN Sd,Dn
+

Argument Preparation

a → Dn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN2 Vd.16B,Vn.8H
+

Argument Preparation

r → Vd.8B 
+a → Vn.8H

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN2 Vd.8H,Vn.4S
+

Argument Preparation

r → Vd.4H 
+a → Vn.4S

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

+

A64 Instruction

SQXTUN2 Vd.4S,Vn.2D
+

Argument Preparation

r → Vd.2S 
+a → Vn.2D

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(2*datasize) operand = V[n];
+bits(datasize) result;
+bits(2*esize) element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 2*esize];
+    (Elem[result, e, esize], sat) = UnsignedSatQ(SInt(element), esize);
+    if sat then FPSR.QC = '1';
+
+Vpart[d, part] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
+

Argument Preparation

0 << lane << 1 

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
+

Argument Preparation

0 << lane << 1 

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 1
+

Argument Preparation

0 << lane << 3 

Results

N/A → result
+

Supported architectures

A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * v[lane]) for i = 0 to 3
+

Argument Preparation

0 << lane << 3 

Results

N/A → result
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Sd,Hn,Vm.H[lane]
+

Argument Preparation

a → Sd 
+b → Hn
+v → Vm.4H
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Dd,Sn,Vm.S[lane]
+

Argument Preparation

a → Dd 
+b → Sn
+v → Vm.2S
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Sd,Hn,Vm.H[lane]
+

Argument Preparation

a → Sd 
+b → Hn
+v → Vm.8H
+0 << lane << 7

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Dd,Sn,Vm.S[lane]
+

Argument Preparation

a → Dd 
+b → Sn
+v → Vm.4S
+0 << lane << 3

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
+

Argument Preparation

0 << lane << 1 

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
+

Argument Preparation

0 << lane << 1 

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 1
+

Argument Preparation

0 << lane << 3 

Results

N/A → result
+

Supported architectures

A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * v[lane]) for i = 0 to 3
+

Argument Preparation

0 << lane << 3 

Results

N/A → result
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Sd,Hn,Vm.H[lane]
+

Argument Preparation

a → Sd 
+b → Hn
+v → Vm.4H
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Dd,Sn,Vm.S[lane]
+

Argument Preparation

a → Dd 
+b → Sn
+v → Vm.2S
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Sd,Hn,Vm.H[lane]
+

Argument Preparation

a → Sd 
+b → Hn
+v → Vm.8H
+0 << lane << 7

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Dd,Sn,Vm.S[lane]
+

Argument Preparation

a → Dd 
+b → Sn
+v → Vm.4S
+0 << lane << 3

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Dd,Dn,Vm.D[0]
+

Argument Preparation

a → Dn 
+b → Vm.D[0]

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2D,Vn.2D,Vm.D[0]
+

Argument Preparation

a → Vn.2D 
+b → Vm.D[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vn.2D 
+v → Vm.1D
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.2S
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Dd,Dn,Vm.S[lane]
+

Argument Preparation

a → Dn 
+v → Vm.1D
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MUL Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if poly then
+        product = PolynomialMult(element1, element2)<esize-1:0>;
+    else
+        product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    Elem[result, e, esize] = product;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Vd.2D,Vn.2D,Vm.D[lane]
+

Argument Preparation

a → Vn.2D 
+v → Vm.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.4S
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FMUL Dd,Dn,Vm.D[lane]
+

Argument Preparation

a → Dn 
+v → Vm.2D
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPMul(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMULL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMULL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    Elem[result, e, 2*esize] = (element1*element2)<2*esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Sd,Hn,Vm.H[lane]
+

Argument Preparation

a → Hn 
+v → Vm.4H
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Dd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.2S
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.4S,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Vd.2D,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Sd,Hn,Vm.H[lane]
+

Argument Preparation

a → Hn 
+v → Vm.8H
+0 << lane << 7

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL Dd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.4S
+0 << lane << 3

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.4S,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULL2 Vd.2D,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    Elem[result, e, 2*esize] = product;
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Hd,Hn,Vm.H[lane]
+

Argument Preparation

a → Hn 
+v → Vm.4H
+0 << lane << 3

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Sd,Sn,Vm.H[lane]
+

Argument Preparation

a → Sn 
+v → Vm.2S
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Hd,Hn,Vm.H[lane]
+

Argument Preparation

a → Hn 
+v → Vm.8H
+0 << lane << 7

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQDMULH Sd,Sn,Vm.H[lane]
+

Argument Preparation

a → Sn 
+v → Vm.4S
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vn.4H 
+b → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vn.8H 
+b → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vn.2S 
+b → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vn.4S 
+b → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.4H
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.2S
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Hd,Hn,Vm.H[lane]
+

Argument Preparation

a → Hn 
+v → Vm.4H
+0 << lane << 3

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.2S
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4H,Vn.4H,Vm.H[lane]
+

Argument Preparation

a → Vn.4H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.8H,Vn.8H,Vm.H[lane]
+

Argument Preparation

a → Vn.8H 
+v → Vm.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.2S,Vn.2S,Vm.S[lane]
+

Argument Preparation

a → Vn.2S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Vd.4S,Vn.4S,Vm.S[lane]
+

Argument Preparation

a → Vn.4S 
+v → Vm.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Hd,Hn,Vm.H[lane]
+

Argument Preparation

a → Hn 
+v → Vm.8H
+0 << lane << 7

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SQRDMULH Sd,Sn,Vm.S[lane]
+

Argument Preparation

a → Sn 
+v → Vm.4S
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+integer round_const = if rounding then 1 << (esize - 1) else 0;
+integer element1;
+integer element2;
+integer product;
+boolean sat;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    product = (2 * element1 * element2) + round_const;
+    (Elem[result, e, esize], sat) = SignedSatQ(product >> esize, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLA Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * c) for i = 0 to 1
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] + (b[i] * c) for i = 0 to 3
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLAL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

UMLAL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLAL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4H,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4H 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.8H,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.8H 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

+

A64 Instruction

MLS Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+bits(esize) product;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    product = (UInt(element1)*UInt(element2))<esize-1:0>;
+    if sub_op then
+        Elem[result, e, esize] = Elem[operand3, e, esize] - product;
+    else
+        Elem[result, e, esize] = Elem[operand3, e, esize] + product;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * c) for i = 0 to 1
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

RESULT[I] = a[i] - (b[i] * c) for i = 0 to 3
+

Argument Preparation

a → N/A 
+b → N/A
+c → N/A

Results

N/A → result
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SMLSL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMLSL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+bits(2*esize) accum;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[operand1, e, esize], unsigned);
+    element2 = Int(Elem[operand2, e, esize], unsigned);
+    product = (element1*element2)<2*esize-1:0>;
+    if sub_op then
+        accum = Elem[operand3, e, 2*esize] - product;
+    else
+        accum = Elem[operand3, e, 2*esize] + product;
+    Elem[result, e, 2*esize] = accum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.4S,Vn.4H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL Vd.2D,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.4S,Vn.8H,Vm.H[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H
+c → Vm.H[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

SQDMLSL2 Vd.2D,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S
+c → Vm.S[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) operand3 = V[d];
+bits(2*datasize) result;
+integer element1;
+integer element2;
+bits(2*esize) product;
+integer accum;
+boolean sat1;
+boolean sat2;
+
+for e = 0 to elements-1
+    element1 = SInt(Elem[operand1, e, esize]);
+    element2 = SInt(Elem[operand2, e, esize]);
+    (product, sat1) = SignedSatQ(2 * element1 * element2, 2 * esize);
+    if sub_op then
+        accum = SInt(Elem[operand3, e, 2*esize]) - SInt(product);
+    else
+        accum = SInt(Elem[operand3, e, 2*esize]) + SInt(product);
+    (Elem[result, e, 2*esize], sat2) = SignedSatQ(accum, 2 * esize);
+    if sat1 || sat2 then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ABS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FABS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Bd,Bn
+

Argument Preparation

a → Bn 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Hd,Hn
+

Argument Preparation

a → Hn 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQABS Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FNEG Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FNEG Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

NEG Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    Elem[result, e, esize] = element<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FNEG Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FNEG Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    if neg then
+        element = FPNeg(element);
+    else
+        element = FPAbs(element);
+    Elem[result, e, esize] = element;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Bd,Bn
+

Argument Preparation

a → Bn 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Hd,Hn
+

Argument Preparation

a → Hn 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SQNEG Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element;
+boolean sat;
+
+for e = 0 to elements-1
+    element = SInt(Elem[operand, e, esize]);
+    if neg then
+        element = -element;
+    else
+        element = Abs(element);
+    (Elem[result, e, esize], sat) = SignedSatQ(element, esize);
+    if sat then FPSR.QC = '1';
+
+V[d] = result;
+

Supported architectures

A64

Description

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

+

A64 Instruction

CLS Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

+

A64 Instruction

CLS Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

+

A64 Instruction

CLS Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

+

A64 Instruction

CLS Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

+

A64 Instruction

CLS Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

+

A64 Instruction

CLS Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.4H,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.8H,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CLZ Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    if countop == CountOp_CLS then
+        count = CountLeadingSignBits(Elem[operand, e, esize]);
+    else
+        count = CountLeadingZeroBits(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CNT Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = BitCount(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CNT Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = BitCount(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CNT Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = BitCount(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CNT Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = BitCount(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CNT Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = BitCount(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

CNT Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+integer count;
+for e = 0 to elements-1
+    count = BitCount(Elem[operand, e, esize]);
+    Elem[result, e, esize] = count<esize-1:0>;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URECPE Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 32];
+    Elem[result, e, 32] = UnsignedRecipEstimate(element);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

URECPE Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 32];
+    Elem[result, e, 32] = UnsignedRecipEstimate(element);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPE Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecipEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPE Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecipEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPE Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecipEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPE Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecipEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPE Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecipEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPE Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecipEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPS Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRecipStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPS Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRecipStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPS Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRecipStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPS Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRecipStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPS Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRecipStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPS Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRecipStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSQRT Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPSqrt(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSQRT Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPSqrt(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSQRT Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPSqrt(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FSQRT Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPSqrt(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

URSQRTE Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 32];
+    Elem[result, e, 32] = UnsignedRSqrtEstimate(element);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

URSQRTE Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(32) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, 32];
+    Elem[result, e, 32] = UnsignedRSqrtEstimate(element);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTE Vd.2S,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRSqrtEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTE Vd.4S,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRSqrtEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTE Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRSqrtEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTE Vd.2D,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRSqrtEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTE Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRSqrtEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTE Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRSqrtEstimate(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTS Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRSqrtStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTS Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRSqrtStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTS Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRSqrtStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTS Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRSqrtStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTS Sd,Sn,Sm
+

Argument Preparation

a → Sn 
+b → Sm

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRSqrtStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRSQRTS Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPRSqrtStepFused(element1, element2);
+
+V[d] = result;
+

Supported architectures

A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

MVN Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

The description of + NOT + gives the operational pseudocode for this instruction.

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Dd,Dn,Dm
+

Argument Preparation

a → Dn 
+b → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

AND Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Exclusive OR (vector). This instruction performs a bitwise Exclusive OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

+

A64 Instruction

EOR Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand2;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand2 = Zeros();
+operand3 = Ones();
+V[d] = operand1 EOR ((operand2 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

BIC Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 AND operand2;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

ORN Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+operand2 = NOT(operand2);
+
+result = operand1 OR operand2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vd.8B 
+b → Vn.8B
+c → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

A64

Description

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

+

A64 Instruction

BSL Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vd.16B 
+b → Vn.16B
+c → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[m];
+operand3 = V[d];
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.8B 
+0 << lane1 << 7
+b → Vn.8B
+0 << lane2 << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.16B 
+0 << lane1 << 15
+b → Vn.8B
+0 << lane2 << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.4H 
+0 << lane1 << 3
+b → Vn.4H
+0 << lane2 << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.8H 
+0 << lane1 << 7
+b → Vn.4H
+0 << lane2 << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.2S 
+0 << lane1 << 1
+b → Vn.2S
+0 << lane2 << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.4S 
+0 << lane1 << 3
+b → Vn.2S
+0 << lane2 << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.1D
+0 << lane2 << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.1D
+0 << lane2 << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.8B 
+0 << lane1 << 7
+b → Vn.8B
+0 << lane2 << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.16B 
+0 << lane1 << 15
+b → Vn.8B
+0 << lane2 << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.4H 
+0 << lane1 << 3
+b → Vn.4H
+0 << lane2 << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.8H 
+0 << lane1 << 7
+b → Vn.4H
+0 << lane2 << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.2S 
+0 << lane1 << 1
+b → Vn.2S
+0 << lane2 << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.4S 
+0 << lane1 << 3
+b → Vn.2S
+0 << lane2 << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.1D
+0 << lane2 << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.1D
+0 << lane2 << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.1D
+0 << lane2 << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.1D
+0 << lane2 << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.2S 
+0 << lane1 << 1
+b → Vn.2S
+0 << lane2 << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.4S 
+0 << lane1 << 3
+b → Vn.2S
+0 << lane2 << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.1D
+0 << lane2 << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.1D
+0 << lane2 << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.8B 
+0 << lane1 << 7
+b → Vn.8B
+0 << lane2 << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.16B 
+0 << lane1 << 15
+b → Vn.8B
+0 << lane2 << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.4H 
+0 << lane1 << 3
+b → Vn.4H
+0 << lane2 << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.8H 
+0 << lane1 << 7
+b → Vn.4H
+0 << lane2 << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.8B 
+0 << lane1 << 7
+b → Vn.16B
+0 << lane2 << 15

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.16B 
+0 << lane1 << 15
+b → Vn.16B
+0 << lane2 << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.4H 
+0 << lane1 << 3
+b → Vn.8H
+0 << lane2 << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.8H 
+0 << lane1 << 7
+b → Vn.8H
+0 << lane2 << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.2S 
+0 << lane1 << 1
+b → Vn.4S
+0 << lane2 << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.4S 
+0 << lane1 << 3
+b → Vn.4S
+0 << lane2 << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.2D
+0 << lane2 << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.2D
+0 << lane2 << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.8B 
+0 << lane1 << 7
+b → Vn.16B
+0 << lane2 << 15

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.16B 
+0 << lane1 << 15
+b → Vn.16B
+0 << lane2 << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.4H 
+0 << lane1 << 3
+b → Vn.8H
+0 << lane2 << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.8H 
+0 << lane1 << 7
+b → Vn.8H
+0 << lane2 << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.2S 
+0 << lane1 << 1
+b → Vn.4S
+0 << lane2 << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.4S 
+0 << lane1 << 3
+b → Vn.4S
+0 << lane2 << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.2D
+0 << lane2 << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.2D
+0 << lane2 << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.2D
+0 << lane2 << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.2D
+0 << lane2 << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.2S 
+0 << lane1 << 1
+b → Vn.4S
+0 << lane2 << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane1],Vn.S[lane2]
+

Argument Preparation

a → Vd.4S 
+0 << lane1 << 3
+b → Vn.4S
+0 << lane2 << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane2]
+

Argument Preparation

a → UNUSED 
+0 << lane1 << 0
+b → Vn.2D
+0 << lane2 << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane1],Vn.D[lane2]
+

Argument Preparation

a → Vd.2D 
+0 << lane1 << 1
+b → Vn.2D
+0 << lane2 << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.8B 
+0 << lane1 << 7
+b → Vn.16B
+0 << lane2 << 15

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane1],Vn.B[lane2]
+

Argument Preparation

a → Vd.16B 
+0 << lane1 << 15
+b → Vn.16B
+0 << lane2 << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.4H 
+0 << lane1 << 3
+b → Vn.8H
+0 << lane2 << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane1],Vn.H[lane2]
+

Argument Preparation

a → Vd.8H 
+0 << lane1 << 7
+b → Vn.8H
+0 << lane2 << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

RBIT Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    for i = 0 to esize-1
+        rev<esize-1-i> = element<i>;
+    Elem[result, e, esize] = rev;
+
+V[d] = result;
+

Supported architectures

A64

Description

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

RBIT Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    for i = 0 to esize-1
+        rev<esize-1-i> = element<i>;
+    Elem[result, e, esize] = rev;
+
+V[d] = result;
+

Supported architectures

A64

Description

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

RBIT Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    for i = 0 to esize-1
+        rev<esize-1-i> = element<i>;
+    Elem[result, e, esize] = rev;
+
+V[d] = result;
+

Supported architectures

A64

Description

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

RBIT Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    for i = 0 to esize-1
+        rev<esize-1-i> = element<i>;
+    Elem[result, e, esize] = rev;
+
+V[d] = result;
+

Supported architectures

A64

Description

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

RBIT Vd.8B,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    for i = 0 to esize-1
+        rev<esize-1-i> = element<i>;
+    Elem[result, e, esize] = rev;
+
+V[d] = result;
+

Supported architectures

A64

Description

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

RBIT Vd.16B,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+bits(esize) rev;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    for i = 0 to esize-1
+        rev<esize-1-i> = element<i>;
+    Elem[result, e, esize] = rev;
+
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[0],Xn
+

Argument Preparation

a → Xn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,rn
+

Argument Preparation

value → rn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,rn
+

Argument Preparation

value → rn 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,rn
+

Argument Preparation

value → rn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,rn
+

Argument Preparation

value → rn 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,rn
+

Argument Preparation

value → rn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,rn
+

Argument Preparation

value → rn 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Dd.D[0],xn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,rn
+

Argument Preparation

value → rn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,rn
+

Argument Preparation

value → rn 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,rn
+

Argument Preparation

value → rn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,rn
+

Argument Preparation

value → rn 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,rn
+

Argument Preparation

value → rn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,rn
+

Argument Preparation

value → rn 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Dd.D[0],xn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Dd.D[0],xn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,rn
+

Argument Preparation

value → rn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,rn
+

Argument Preparation

value → rn 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,rn
+

Argument Preparation

value → rn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,rn
+

Argument Preparation

value → rn 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,rn
+

Argument Preparation

value → rn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,rn
+

Argument Preparation

value → rn 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Dd.D[0],xn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,rn
+

Argument Preparation

value → rn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,rn
+

Argument Preparation

value → rn 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,rn
+

Argument Preparation

value → rn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,rn
+

Argument Preparation

value → rn 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,rn
+

Argument Preparation

value → rn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,rn
+

Argument Preparation

value → rn 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,rn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,rn
+

Argument Preparation

value → rn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,rn
+

Argument Preparation

value → rn 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,rn
+

Argument Preparation

value → rn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,rn
+

Argument Preparation

value → rn 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,rn
+

Argument Preparation

value → rn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,rn
+

Argument Preparation

value → rn 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,rn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,rn
+

Argument Preparation

value → rn 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,rn
+

Argument Preparation

value → rn 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,rn
+

Argument Preparation

value → rn 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,rn
+

Argument Preparation

value → rn 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,rn
+

Argument Preparation

value → rn 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,rn
+

Argument Preparation

value → rn 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,rn
+

Argument Preparation

value → rn 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,rn
+

Argument Preparation

value → rn 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2S,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4S,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8B,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.16B,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.4H,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.8H,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.2D,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.8B 
+high → Vm.8B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.4H 
+high → Vm.4H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.2S 
+high → Vm.2S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.1D 
+high → Vm.1D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.8B 
+high → Vm.8B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.4H 
+high → Vm.4H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.2S 
+high → Vm.2S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.1D 
+high → Vm.1D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.1D 
+high → Vm.1D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.4H 
+high → Vm.4H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.2S 
+high → Vm.2S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.8B 
+high → Vm.8B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.4H 
+high → Vm.4H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+INS Vd.D[1],Vm.D[0]
+

Argument Preparation

low → Vn.1D 
+high → Vm.1D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.16B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.4S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.16B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.4S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.4S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.16B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[1]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.16B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.4S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.16B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.4S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.4S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.16B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.8H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Vd.1D,Vn.D[0]
+

Argument Preparation

a → Vn.2D 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Bd,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Bd,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

vec → Vn.2S 
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Bd,Vn.B[lane]
+

Argument Preparation

vec → Vn.8B 
+0 << lane << 7

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

vec → Vn.4H 
+0 << lane << 3

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Bd,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Bd,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

vec → Vn.4S 
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

vec → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Bd,Vn.B[lane]
+

Argument Preparation

vec → Vn.16B 
+0 << lane << 15

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

vec → Vn.8H 
+0 << lane << 7

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8B
+0 << lane << 7

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.16B
+0 << lane << 15

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4H
+0 << lane << 3

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8H
+0 << lane << 7

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.S}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2S
+0 << lane << 1

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.S}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4S
+0 << lane << 3

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.1D
+0 << lane << 0

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2D
+0 << lane << 1

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.B}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8B
+0 << lane << 7

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.B}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.16B
+0 << lane << 15

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4H
+0 << lane << 3

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8H
+0 << lane << 7

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.S}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2S
+0 << lane << 1

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.S}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4S
+0 << lane << 3

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.1D
+0 << lane << 0

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2D
+0 << lane << 1

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.1D
+0 << lane << 0

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2D
+0 << lane << 1

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4H
+0 << lane << 3

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8H
+0 << lane << 7

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.S}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2S
+0 << lane << 1

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.S}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4S
+0 << lane << 3

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.B}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8B
+0 << lane << 7

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.B}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.16B
+0 << lane << 15

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.4H
+0 << lane << 3

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.H}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.8H
+0 << lane << 7

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.1D
+0 << lane << 0

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.D}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src → Vt.2D
+0 << lane << 1

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4S → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.16B → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.4H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.8H → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.1D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

+

A64 Instruction

LD1R {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt.2D → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

+

A64 Instruction

LD2R {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

+

A64 Instruction

LD3R {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

+

A64 Instruction

LD4R {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

ptr → Xn
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

ptr → Xn
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2S
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4S
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2S
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4S
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2S
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4S
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8B
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8B
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.8B
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.16B
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.16B
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.16B
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 3-element structure to one lane of three registers). This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4H
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8H
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2S
+src.val[2] → Vt3.2S
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4S
+src.val[2] → Vt3.4S
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4H
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8H
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2S
+src.val[2] → Vt3.2S
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4S
+src.val[2] → Vt3.4S
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4H
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8H
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2S
+src.val[2] → Vt3.2S
+src.val[1] → Vt2.2S
+src.val[0] → Vt.2S
+0 << lane << 1

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4S
+src.val[2] → Vt3.4S
+src.val[1] → Vt2.4S
+src.val[0] → Vt.4S
+0 << lane << 3

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.4H
+src.val[2] → Vt3.4H
+src.val[1] → Vt2.4H
+src.val[0] → Vt.4H
+0 << lane << 3

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8H
+src.val[2] → Vt3.8H
+src.val[1] → Vt2.8H
+src.val[0] → Vt.8H
+0 << lane << 7

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8B
+src.val[2] → Vt3.8B
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8B
+src.val[2] → Vt3.8B
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.8B
+src.val[2] → Vt3.8B
+src.val[1] → Vt2.8B
+src.val[0] → Vt.8B
+0 << lane << 7

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.16B
+src.val[2] → Vt3.16B
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.16B
+src.val[2] → Vt3.16B
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.16B
+src.val[2] → Vt3.16B
+src.val[1] → Vt2.16B
+src.val[0] → Vt.16B
+0 << lane << 15

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.1D
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2D
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.1D
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2D
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.1D
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2D
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.1D
+src.val[2] → Vt3.1D
+src.val[1] → Vt2.1D
+src.val[0] → Vt.1D
+0 << lane << 0

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

+

A64 Instruction

LD4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+src.val[3] → Vt4.2D
+src.val[2] → Vt3.2D
+src.val[1] → Vt2.2D
+src.val[0] → Vt.2D
+0 << lane << 1

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.s - Vt2.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.h - Vt2.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.b - Vt2.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

+

A64 Instruction

ST2 {Vt.d - Vt2.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 2

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.s - Vt3.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.h - Vt3.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.b - Vt3.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

+

A64 Instruction

ST3 {Vt.d - Vt3.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.s - Vt4.s}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H
+0 << lane << 3

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.h - Vt4.h}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H
+0 << lane << 7

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.b - Vt4.b}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B
+0 << lane << 15

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D
+0 << lane << 0

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

+

A64 Instruction

ST4 {Vt.d - Vt4.d}[lane],[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D
+0 << lane << 1

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2S
+val.val[2] → Vt3.2S
+val.val[1] → Vt2.2S
+val.val[0] → Vt.2S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4S
+val.val[2] → Vt3.4S
+val.val[1] → Vt2.4S
+val.val[0] → Vt.4S

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8B
+val.val[2] → Vt3.8B
+val.val[1] → Vt2.8B
+val.val[0] → Vt.8B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.16B
+val.val[2] → Vt3.16B
+val.val[1] → Vt2.16B
+val.val[0] → Vt.16B

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.4H
+val.val[2] → Vt3.4H
+val.val[1] → Vt2.4H
+val.val[0] → Vt.4H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.8H
+val.val[2] → Vt3.8H
+val.val[1] → Vt2.8H
+val.val[0] → Vt.8H

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.1D
+val.val[2] → Vt3.1D
+val.val[1] → Vt2.1D
+val.val[0] → Vt.1D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

+

A64 Instruction

ST1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 
+val.val[3] → Vt4.2D
+val.val[2] → Vt3.2D
+val.val[1] → Vt2.2D
+val.val[0] → Vt.2D

Results

void → result
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt2.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt2.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt2.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt2.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt2.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt2.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt2.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt2.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt3.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt3.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt3.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt3.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt3.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt3.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt3.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt3.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2S - Vt4.2S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2S → result.val[3]
+Vt3.2S → result.val[2]
+Vt2.2S → result.val[1]
+Vt.2S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4S - Vt4.4S},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4S → result.val[3]
+Vt3.4S → result.val[2]
+Vt2.4S → result.val[1]
+Vt.4S → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8B - Vt4.8B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8B → result.val[3]
+Vt3.8B → result.val[2]
+Vt2.8B → result.val[1]
+Vt.8B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.16B - Vt4.16B},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.16B → result.val[3]
+Vt3.16B → result.val[2]
+Vt2.16B → result.val[1]
+Vt.16B → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.4H - Vt4.4H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.4H → result.val[3]
+Vt3.4H → result.val[2]
+Vt2.4H → result.val[1]
+Vt.4H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.8H - Vt4.8H},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.8H → result.val[3]
+Vt3.8H → result.val[2]
+Vt2.8H → result.val[1]
+Vt.8H → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

v7/A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A32/A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.1D - Vt4.1D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.1D → result.val[3]
+Vt3.1D → result.val[2]
+Vt2.1D → result.val[1]
+Vt.1D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

+

A64 Instruction

LD1 {Vt.2D - Vt4.2D},[Xn]
+

Argument Preparation

ptr → Xn 

Results

Vt4.2D → result.val[3]
+Vt3.2D → result.val[2]
+Vt2.2D → result.val[1]
+Vt.2D → result.val[0]
+

Operation

+
if HaveMTEExt() then
+    SetNotTagCheckedInstruction(!wback && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+
+bits(64) address;
+bits(64) offs;
+bits(128) rval;
+bits(esize) element;
+constant integer ebytes = esize DIV 8;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+offs = Zeros();
+if replicate then
+    // load and replicate to all elements
+    for s = 0 to selem-1
+        element = Mem[address+offs, ebytes, AccType_VEC];
+        // replicate to fill 128- or 64-bit register
+        V[t] = Replicate(element, datasize DIV esize);
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+else
+    // load/store one element per register
+    for s = 0 to selem-1
+        rval = V[t];
+        if memop == MemOp_LOAD then
+            // insert into one lane of 128-bit register
+            Elem[rval, index, esize] = Mem[address+offs, ebytes, AccType_VEC];
+            V[t] = rval;
+        else // memop == MemOp_STORE
+            // extract from one lane of 128-bit register
+            Mem[address+offs, ebytes, AccType_VEC] = Elem[rval, index, esize];
+        offs = offs + ebytes;
+        t = (t + 1) MOD 32;
+
+if wback then
+    if m != 31 then
+        offs = X[m];
+    if n == 31 then
+        SP[] = address + offs;
+    else
+        X[n] = address + offs;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.4H,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.8H,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.2S,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.4S,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.1D,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.2D,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.4H,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.8H,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.2S,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.4S,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.1D,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.2D,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADALP Vd.4H,Vn.8B
+

Argument Preparation

a → Vd.4H 
+b → Vn.8B

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADALP Vd.8H,Vn.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADALP Vd.2S,Vn.4H
+

Argument Preparation

a → Vd.2S 
+b → Vn.4H

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADALP Vd.4S,Vn.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADALP Vd.1D,Vn.2S
+

Argument Preparation

a → Vd.1D 
+b → Vn.2S

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADALP Vd.2D,Vn.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADALP Vd.4H,Vn.8B
+

Argument Preparation

a → Vd.4H 
+b → Vn.8B

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADALP Vd.8H,Vn.16B
+

Argument Preparation

a → Vd.8H 
+b → Vn.16B

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADALP Vd.2S,Vn.4H
+

Argument Preparation

a → Vd.2S 
+b → Vn.4H

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADALP Vd.4S,Vn.8H
+

Argument Preparation

a → Vd.4S 
+b → Vn.8H

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADALP Vd.1D,Vn.2S
+

Argument Preparation

a → Vd.1D 
+b → Vn.2S

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADALP Vd.2D,Vn.4S
+

Argument Preparation

a → Vd.2D 
+b → Vn.4S

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Bd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Bd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Hd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Hd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP  Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+a → Vm.2S

Results

Vd.S[0] → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Bd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Bd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Hd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Hd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP  Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+a → Vm.2S

Results

Vd.S[0] → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

+

A64 Instruction

ADDV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_ADD, operand, esize);
+

Supported architectures

A64

Description

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

ADDP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[concat, 2*e, esize];
+    element2 = Elem[concat, (2*e)+1, esize];
+    Elem[result, e, esize] = element1 + element2;
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Vt.4S,Vn.4S,Vm.4S
+FADDP Sd,Vt.2S
+

Argument Preparation

a → Vn.4S 
+a → Vm.4S

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FADDP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+    Elem[result, e, esize] = FPAdd(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDLV Hd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDLV Hd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDLV Sd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDLV Sd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

SADDLP Vd.1D,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.

+

A64 Instruction

SADDLV Dd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDLV Hd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDLV Hd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDLV Sd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDLV Sd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

+

A64 Instruction

UADDLP Vd.1D,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+
+bits(2*esize) sum;
+integer op1;
+integer op2;
+
+result = if acc then V[d] else Zeros();
+for e = 0 to elements-1
+    op1 = Int(Elem[operand, 2*e+0, esize], unsigned);
+    op2 = Int(Elem[operand, 2*e+1, esize], unsigned);
+    sum = (op1+op2)<2*esize-1:0>;
+    Elem[result, e, 2*esize] = Elem[result, e, 2*esize] + sum;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UADDLV Dd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer sum;
+
+sum = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    sum = sum + Int(Elem[operand, e, esize], unsigned);
+
+V[d] = sum<2*esize-1:0>;
+

Supported architectures

A64

Description

Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMAXV Bd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMAXV Bd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMAXV Hd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMAXV Hd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMAXP  Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+a → Vm.2S

Results

Vd.S[0] → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMAXV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMAXV Bd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMAXV Bd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMAXV Hd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMAXV Hd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMAXP  Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+a → Vm.2S

Results

Vd.S[0] → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMAXV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_FMAX, operand, esize);
+

Supported architectures

A64

Description

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMINV Bd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMINV Bd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMINV Hd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMINV Hd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

SMINP  Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+a → Vm.2S

Results

Vd.S[0] → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

+

A64 Instruction

SMINV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMINV Bd,Vn.8B
+

Argument Preparation

a → Vn.8B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMINV Bd,Vn.16B
+

Argument Preparation

a → Vn.16B 

Results

Bd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMINV Hd,Vn.4H
+

Argument Preparation

a → Vn.4H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMINV Hd,Vn.8H
+

Argument Preparation

a → Vn.8H 

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UMINP  Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+a → Vm.2S

Results

Vd.S[0] → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+integer element1;
+integer element2;
+integer maxmin;
+
+for e = 0 to elements-1
+    element1 = Int(Elem[concat, 2*e, esize], unsigned);
+    element2 = Int(Elem[concat, (2*e)+1, esize], unsigned);
+    maxmin = if minimum then Min(element1, element2) else Max(element1, element2);
+    Elem[result, e, esize] = maxmin<esize-1:0>;
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

+

A64 Instruction

UMINV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+integer maxmin;
+integer element;
+
+maxmin = Int(Elem[operand, 0, esize], unsigned);
+for e = 1 to elements-1
+    element = Int(Elem[operand, e, esize], unsigned);
+    maxmin = if min then Min(maxmin, element) else Max(maxmin, element);
+
+V[d] = maxmin<esize-1:0>;
+

Supported architectures

A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_FMIN, operand, esize);
+

Supported architectures

A64

Description

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINP Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMin(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMax(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Maximum Number across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_FMAXNUM, operand, esize);
+

Supported architectures

A64

Description

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMAXNMP  Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP  Sd,Vn.2S
+

Argument Preparation

a → Vn.2S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Minimum Number across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMV Sd,Vn.4S
+

Argument Preparation

a → Vn.4S 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+V[d] = Reduce(ReduceOp_FMINNUM, operand, esize);
+

Supported architectures

A64

Description

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

+

A64 Instruction

FMINNMP  Dd,Vn.2D
+

Argument Preparation

a → Vn.2D 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+bits(2*datasize) concat = operand2:operand1;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    if pair then
+        element1 = Elem[concat, 2*e, esize];
+        element2 = Elem[concat, (2*e)+1, esize];
+    else
+        element1 = Elem[operand1, e, esize];
+        element2 = Elem[operand2, e, esize];
+
+    if minimum then
+        Elem[result, e, esize] = FPMinNum(element1, element2, FPCR);
+    else
+        Elem[result, e, esize] = FPMaxNum(element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#n
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#n
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 3

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 1

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 3

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 0

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 1

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#n
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#n
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 3

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 1

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 3

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 0

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 1

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 0

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 1

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<2)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 1

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<2)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 3

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<3)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 0

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<3)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 1

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#n
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#n
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.8B,Vn.8B,Vm.8B,#(n<<1)
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B
+0 << n << 3

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

+

A64 Instruction

EXT Vd.16B,Vn.16B,Vm.16B,#(n<<1)
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B
+0 << n << 7

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) hi = V[m];
+bits(datasize) lo = V[n];
+bits(datasize*2) concat = hi:lo;
+
+V[d] = concat<position+datasize-1:position>;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.4H,Vn.4H
+

Argument Preparation

vec → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.8H,Vn.8H
+

Argument Preparation

vec → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.2S,Vn.2S
+

Argument Preparation

vec → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.4S,Vn.4S
+

Argument Preparation

vec → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.4H,Vn.4H
+

Argument Preparation

vec → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.8H,Vn.8H
+

Argument Preparation

vec → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.2S,Vn.2S
+

Argument Preparation

vec → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.4S,Vn.4S
+

Argument Preparation

vec → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.2S,Vn.2S
+

Argument Preparation

vec → Vn.2S 

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.4S,Vn.4S
+

Argument Preparation

vec → Vn.4S 

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.4H,Vn.4H
+

Argument Preparation

vec → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV64 Vd.8H,Vn.8H
+

Argument Preparation

vec → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.4H,Vn.4H
+

Argument Preparation

vec → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.8H,Vn.8H
+

Argument Preparation

vec → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.4H,Vn.4H
+

Argument Preparation

vec → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.8H,Vn.8H
+

Argument Preparation

vec → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.4H,Vn.4H
+

Argument Preparation

vec → Vn.4H 

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV32 Vd.8H,Vn.8H
+

Argument Preparation

vec → Vn.8H 

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV16 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV16 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV16 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV16 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV16 Vd.8B,Vn.8B
+

Argument Preparation

vec → Vn.8B 

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

REV16 Vd.16B,Vn.16B
+

Argument Preparation

vec → Vn.16B 

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+integer element = 0;
+integer rev_element;
+for c = 0 to containers-1
+    rev_element = element + elements_per_container - 1;
+    for e = 0 to elements_per_container-1
+        Elem[result, rev_element, esize] = Elem[operand, element, esize];
+        element = element + 1;
+        rev_element = rev_element - 1;
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (primary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.2D,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN2 Vd.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

Vn → Zeros(64):a 
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

Vn → Zeros(64):a 
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

Vn → Zeros(64):a 
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

+

A64 Instruction

MOVI Vtmp.8B,#8
+CMHS Vtmp.8B,Vm.8B,Vtmp.8B
+TBL Vtmp1.8B,{Vn.16B},Vm.8B
+BIF Vd.8B,Vtmp1.8B,Vtmp.8B
+

Argument Preparation

a → Vd 
+Vn → Zeros(64):b
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[d];
+operand3 = NOT(V[m]);
+
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

+

A64 Instruction

MOVI Vtmp.8B,#8
+CMHS Vtmp.8B,Vm.8B,Vtmp.8B
+TBL Vtmp1.8B,{Vn.16B},Vm.8B
+BIF Vd.8B,Vtmp1.8B,Vtmp.8B
+

Argument Preparation

a → Vd 
+Vn → Zeros(64):b
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[d];
+operand3 = NOT(V[m]);
+
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

+

A64 Instruction

MOVI Vtmp.8B,#8
+CMHS Vtmp.8B,Vm.8B,Vtmp.8B
+TBL Vtmp1.8B,{Vn.16B},Vm.8B
+BIF Vd.8B,Vtmp1.8B, Vtmp.8B
+

Argument Preparation

a → Vd 
+Vn → Zeros(64):b
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[d];
+operand3 = NOT(V[m]);
+
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+Vn+1 → Zeros(64):a.val[2]
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+Vn+1 → Zeros(64):a.val[2]
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+Vn+1 → Zeros(64):a.val[2]
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+Vn+1 → a.val[3]:a.val[2]
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+Vn+1 → a.val[3]:a.val[2]
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

Vn → a.val[1]:a.val[0] 
+Vn+1 → a.val[3]:a.val[2]
+b → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

+

A64 Instruction

MOVI Vtmp.8B,#24
+CMHS Vtmp.8B,Vm.8B,Vtmp.8B
+TBL Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8
+BIF Vd.8B,Vtmp1.8B,Vtmp.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+Vn+1 → Zeros(64):b.val[2]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[d];
+operand3 = NOT(V[m]);
+
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

+

A64 Instruction

MOVI Vtmp.8B,#24
+CMHS Vtmp.8B,Vm.8B,Vtmp.8B
+TBL Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
+BIF Vd.8B,Vtmp1.8B,Vtmp.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+Vn+1 → Zeros(64):b.val[2]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[d];
+operand3 = NOT(V[m]);
+
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

+

A64 Instruction

MOVI Vtmp.8B,#24
+CMHS Vtmp.8B,Vm.8B,Vtmp.8B
+TBL Vtmp1.8B,{Vn.16B,Vn+1.16B},Vm.8B
+BIF Vd.8B,Vtmp1.8B,Vtmp.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+Vn+1 → Zeros(64):b.val[2]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1;
+bits(datasize) operand3;
+bits(datasize) operand4 = V[n];
+
+operand1 = V[d];
+operand3 = NOT(V[m]);
+
+V[d] = operand1 EOR ((operand1 EOR operand4) AND operand3);
+

Supported architectures

v7/A32/A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+Vn+1 → b.val[3]:b.val[2]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+Vn+1 → b.val[3]:b.val[2]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B,Vn+1.16B},Vm.8B
+

Argument Preparation

a → Vd 
+Vn → b.val[1]:b.val[0]
+Vn+1 → b.val[3]:b.val[2]
+c → Vm

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

t → Vn.16B 
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B},Vm.16B
+

Argument Preparation

t → Vn.16B 
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

t → Vn.16B 
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B},Vm.16B
+

Argument Preparation

t → Vn.16B 
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

t → Vn.16B 
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B},Vm.16B
+

Argument Preparation

t → Vn.16B 
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t → Vn.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t → Vn.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t → Vn.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t → Vn.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t → Vn.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t → Vn.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBL Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+

Argument Preparation

t.val[0] → Vn.16B 
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+1.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+1.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+2.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+2.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.8B,{Vn.16B - Vn+3.16B},Vm.8B
+

Argument Preparation

a → Vd.8B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.8B

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

+

A64 Instruction

TBX Vd.16B,{Vn.16B - Vn+3.16B},Vm.16B
+

Argument Preparation

a → Vd.16B 
+t.val[0] → Vn.16B
+t.val[1] → Vn+1.16B
+t.val[2] → Vn+2.16B
+t.val[3] → Vn+3.16B
+idx → Vm.16B

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) indices = V[m];
+bits(128*regs) table = Zeros();
+bits(datasize) result;
+integer index;
+
+// Create table from registers
+for i = 0 to regs-1
+    table<128*i+127:128*i> = V[n];
+    n = (n + 1) MOD 32;
+
+result = if is_tbl then Zeros() else V[d];
+for i = 0 to elements-1
+    index = UInt(Elem[indices, i, 8]);
+    if index < 16 * regs then
+        Elem[result, i, 8] = Elem[table, index, 8];
+
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.B[lane]
+

Argument Preparation

v → Vn.8B 
+0 << lane << 7

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.H[lane]
+

Argument Preparation

v → Vn.4H 
+0 << lane << 3

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.S[lane]
+

Argument Preparation

v → Vn.2S 
+0 << lane << 1

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.D[lane]
+

Argument Preparation

v → Vn.1D 
+0 << lane << 0

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.D[lane]
+

Argument Preparation

v → Vn.1D 
+0 << lane << 0

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

A32/A64

Description

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

+

A64 Instruction

SMOV Rd,Vn.B[lane]
+

Argument Preparation

v → Vn.8B 
+0 << lane << 7

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = SignExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

+

A64 Instruction

SMOV Rd,Vn.H[lane]
+

Argument Preparation

v → Vn.4H 
+0 << lane << 3

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = SignExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

+

A64 Instruction

SMOV Rd,Vn.S[lane]
+

Argument Preparation

v → Vn.2S 
+0 << lane << 1

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = SignExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.D[lane]
+

Argument Preparation

v → Vn.1D 
+0 << lane << 0

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.B[lane]
+

Argument Preparation

v → Vn.8B 
+0 << lane << 7

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.H[lane]
+

Argument Preparation

v → Vn.4H 
+0 << lane << 3

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

v → Vn.2S 
+0 << lane << 1

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

v → Vn.1D 
+0 << lane << 0

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.B[lane]
+

Argument Preparation

v → Vn.16B 
+0 << lane << 15

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.H[lane]
+

Argument Preparation

v → Vn.8H 
+0 << lane << 7

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.S[lane]
+

Argument Preparation

v → Vn.4S 
+0 << lane << 3

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.D[lane]
+

Argument Preparation

v → Vn.2D 
+0 << lane << 1

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.D[lane]
+

Argument Preparation

v → Vn.2D 
+0 << lane << 1

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

A32/A64

Description

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

+

A64 Instruction

SMOV Rd,Vn.B[lane]
+

Argument Preparation

v → Vn.16B 
+0 << lane << 15

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = SignExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

+

A64 Instruction

SMOV Rd,Vn.H[lane]
+

Argument Preparation

v → Vn.8H 
+0 << lane << 7

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = SignExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

+

A64 Instruction

SMOV Rd,Vn.S[lane]
+

Argument Preparation

v → Vn.4S 
+0 << lane << 3

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = SignExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.D[lane]
+

Argument Preparation

v → Vn.2D 
+0 << lane << 1

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.B[lane]
+

Argument Preparation

v → Vn.16B 
+0 << lane << 15

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

+

A64 Instruction

UMOV Rd,Vn.H[lane]
+

Argument Preparation

v → Vn.8H 
+0 << lane << 7

Results

Rd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(idxdsize) operand = V[n];
+
+X[d] = ZeroExtend(Elem[operand, index, esize], datasize);
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

v → Vn.4H 
+0 << lane << 3

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Hd,Vn.H[lane]
+

Argument Preparation

v → Vn.8H 
+0 << lane << 7

Results

Hd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Sd,Vn.S[lane]
+

Argument Preparation

v → Vn.4S 
+0 << lane << 3

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

DUP Dd,Vn.D[lane]
+

Argument Preparation

v → Vn.2D 
+0 << lane << 1

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(datasize) result;
+
+for e = 0 to elements-1
+    Elem[result, e, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.8B
+0 << lane << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.1D
+0 << lane << 0

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.1D
+0 << lane << 0

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.8B
+0 << lane << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.1D
+0 << lane << 0

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.8B
+0 << lane << 7

Results

Vd.8B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Vn.H[0]
+

Argument Preparation

a → VnH 
+v → Vd.4H
+0 << lane << 3

Results

Vd.4H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Vn.H[0]
+

Argument Preparation

a → VnH 
+v → Vd.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2S
+0 << lane << 1

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.1D
+0 << lane << 0

Results

Vd.1D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.16B
+0 << lane << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.16B
+0 << lane << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.B[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.16B
+0 << lane << 15

Results

Vd.16B → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.H[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.8H
+0 << lane << 7

Results

Vd.8H → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.S[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.4S
+0 << lane << 3

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

+

A64 Instruction

INS Vd.D[lane],Rn
+

Argument Preparation

a → Rn 
+v → Vd.2D
+0 << lane << 1

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(esize) element = X[n];
+bits(128) result;
+
+result = V[d];
+Elem[result, index, esize] = element;
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPX Sd,Sn
+

Argument Preparation

a → Sn 

Results

Sd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecpX(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

FRECPX Dd,Dn
+

Argument Preparation

a → Dn 

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand = V[n];
+bits(datasize) result;
+bits(esize) element;
+
+for e = 0 to elements-1
+    element = Elem[operand, e, esize];
+    Elem[result, e, esize] = FPRecpX(element, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+n → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+n → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2S,Vn.2S,Vm.S[0]
+

Argument Preparation

a → Vd.2S 
+b → Vn.2S
+n → Vm.S[0]

Results

Vd.2S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.4S,Vn.4S,Vm.S[0]
+

Argument Preparation

a → Vd.4S 
+b → Vn.4S
+n → Vm.S[0]

Results

Vd.4S → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, adds the product to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FMADD Dd,Dn,Dm,Da
+

Argument Preparation

a → Da 
+b → Dn
+n → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) result;
+bits(datasize) operanda = V[a];
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+
+result = FPMulAdd(operanda, operand1, operand2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLA Vd.2D,Vn.2D,Vm.D[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+n → Vm.D[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, adds that to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.

+

A64 Instruction

FMSUB Dd,Dn,Dm,Da
+

Argument Preparation

a → Da 
+b → Dn
+n → Dm

Results

Dd → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) result;
+bits(datasize) operanda = V[a];
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+
+operand1 = FPNeg(operand1);
+result = FPMulAdd(operanda, operand1, operand2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

+

A64 Instruction

FMLS Vd.2D,Vn.2D,Vm.D[0]
+

Argument Preparation

a → Vd.2D 
+b → Vn.2D
+n → Vm.D[0]

Results

Vd.2D → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) operand3 = V[d];
+bits(datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    if sub_op then element1 = FPNeg(element1);
+    Elem[result, e, esize] = FPMulAdd(Elem[operand3, e, esize], element1, element2, FPCR);
+
+V[d] = result;
+

Supported architectures

A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.8B,Vn.8B,Vm.8B
+TRN2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.4H,Vn.4H,Vm.4H
+TRN2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.8B,Vn.8B,Vm.8B
+TRN2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.4H,Vn.4H,Vm.4H
+TRN2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.8B,Vn.8B,Vm.8B
+TRN2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.4H,Vn.4H,Vm.4H
+TRN2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.2S,Vn.2S,Vm.2S
+TRN2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.2S,Vn.2S,Vm.2S
+TRN2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.2S,Vn.2S,Vm.2S
+TRN2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.16B,Vn.16B,Vm.16B
+TRN2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.8H,Vn.8H,Vm.8H
+TRN2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.4S,Vn.4S,Vm.4S
+TRN2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.4S,Vn.4S,Vm.4S
+TRN2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.16B,Vn.16B,Vm.16B
+TRN2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.8H,Vn.8H,Vm.8H
+TRN2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.4S,Vn.4S,Vm.4S
+TRN2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.16B,Vn.16B,Vm.16B
+TRN2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

+

A64 Instruction

TRN1 Vd1.8H,Vn.8H,Vm.8H
+TRN2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, 2*p+part, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, 2*p+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.8B,Vn.8B,Vm.8B
+ZIP2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.4H,Vn.4H,Vm.4H
+ZIP2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.8B,Vn.8B,Vm.8B
+ZIP2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.4H,Vn.4H,Vm.4H
+ZIP2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.8B,Vn.8B,Vm.8B
+ZIP2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.4H,Vn.4H,Vm.4H
+ZIP2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.2S,Vn.2S,Vm.2S
+ZIP2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.2S,Vn.2S,Vm.2S
+ZIP2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.2S,Vn.2S,Vm.2S
+ZIP2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.16B,Vn.16B,Vm.16B
+ZIP2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.8H,Vn.8H,Vm.8H
+ZIP2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.4S,Vn.4S,Vm.4S
+ZIP2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.4S,Vn.4S,Vm.4S
+ZIP2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.16B,Vn.16B,Vm.16B
+ZIP2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.8H,Vn.8H,Vm.8H
+ZIP2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.4S,Vn.4S,Vm.4S
+ZIP2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.16B,Vn.16B,Vm.16B
+ZIP2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Zip vectors (secondary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

+

A64 Instruction

ZIP1 Vd1.8H,Vn.8H,Vm.8H
+ZIP2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = V[n];
+bits(datasize) operand2 = V[m];
+bits(datasize) result;
+
+integer base = part * pairs;
+
+for p = 0 to pairs-1
+    Elem[result, 2*p+0, esize] = Elem[operand1, base+p, esize];
+    Elem[result, 2*p+1, esize] = Elem[operand2, base+p, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.8B,Vn.8B,Vm.8B
+UZP2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.4H,Vn.4H,Vm.4H
+UZP2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.2S,Vn.2S,Vm.2S
+UZP2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.2S,Vn.2S,Vm.2S
+UZP2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.8B,Vn.8B,Vm.8B
+UZP2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.4H,Vn.4H,Vm.4H
+UZP2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.2S,Vn.2S,Vm.2S
+UZP2 Vd2.2S,Vn.2S,Vm.2S
+

Argument Preparation

a → Vn.2S 
+b → Vm.2S

Results

Vd1.2S → result.val[0]
+Vd2.2S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.8B,Vn.8B,Vm.8B
+UZP2 Vd2.8B,Vn.8B,Vm.8B
+

Argument Preparation

a → Vn.8B 
+b → Vm.8B

Results

Vd1.8B → result.val[0]
+Vd2.8B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.4H,Vn.4H,Vm.4H
+UZP2 Vd2.4H,Vn.4H,Vm.4H
+

Argument Preparation

a → Vn.4H 
+b → Vm.4H

Results

Vd1.4H → result.val[0]
+Vd2.4H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.16B,Vn.16B,Vm.16B
+UZP2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.8H,Vn.8H,Vm.8H
+UZP2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.4S,Vn.4S,Vm.4S
+UZP2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.4S,Vn.4S,Vm.4S
+UZP2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.16B,Vn.16B,Vm.16B
+UZP2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.8H,Vn.8H,Vm.8H
+UZP2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.4S,Vn.4S,Vm.4S
+UZP2 Vd2.4S,Vn.4S,Vm.4S
+

Argument Preparation

a → Vn.4S 
+b → Vm.4S

Results

Vd1.4S → result.val[0]
+Vd2.4S → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.16B,Vn.16B,Vm.16B
+UZP2 Vd2.16B,Vn.16B,Vm.16B
+

Argument Preparation

a → Vn.16B 
+b → Vm.16B

Results

Vd1.16B → result.val[0]
+Vd2.16B → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

+

A64 Instruction

UZP1 Vd1.8H,Vn.8H,Vm.8H
+UZP2 Vd2.8H,Vn.8H,Vm.8H
+

Argument Preparation

a → Vn.8H 
+b → Vm.8H

Results

Vd1.8H → result.val[0]
+Vd2.8H → result.val[1]
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operandl = V[n];
+bits(datasize) operandh = V[m];
+bits(datasize) result;
+
+bits(datasize*2) zipped = operandh:operandl;
+for e = 0 to elements-1
+    Elem[result, e, esize] = Elem[zipped, 2*e+part, esize];
+
+V[d] = result;
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2S 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8B 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.2S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.8B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.4H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4H 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.4S 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.16B 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.4S → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.16B → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.8H → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

v7/A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.8H 

Results

Vd.1Q → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.2S → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.8B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.1D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1D 

Results

Vd.4H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.4S → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.16B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.2D 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.16B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.4S → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.16B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.4S → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.16B → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.2D → result
+

Supported architectures

A32/A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.2D → result
+

Supported architectures

A64

Description

A64 Instruction

NOP 
+

Argument Preparation

a → Vd.1Q 

Results

Vd.8H → result
+

Supported architectures

A32/A64

Description

Load SIMD&FP Register (register offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.

+

A64 Instruction

LDR Qd,[Xn]
+

Argument Preparation

ptr → Xn 

Results

Qd → result
+

Operation

+
bits(64) offset = ExtendReg(m, extend_type, shift);
+if HaveMTEExt() then
+    boolean is_load_store = memop IN {MemOp_STORE, MemOp_LOAD};
+    SetNotTagCheckedInstruction(is_load_store && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+bits(64) address;
+bits(datasize) data;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+address = address + offset;
+
+case memop of
+    when MemOp_STORE
+        data = V[t];
+        Mem[address, datasize DIV 8, AccType_VEC] = data;
+
+    when MemOp_LOAD
+        data = Mem[address, datasize DIV 8, AccType_VEC];
+        V[t] = data;
+

Supported architectures

A32/A64

Description

Store SIMD&FP register (register offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.

+

A64 Instruction

STR Qt,[Xn]
+

Argument Preparation

ptr → Xn 
+val → Qt

Results

void → result
+

Operation

+
bits(64) offset = ExtendReg(m, extend_type, shift);
+if HaveMTEExt() then
+    boolean is_load_store = memop IN {MemOp_STORE, MemOp_LOAD};
+    SetNotTagCheckedInstruction(is_load_store && n == 31);
+
+CheckFPAdvSIMDEnabled64();
+bits(64) address;
+bits(datasize) data;
+
+if n == 31 then
+    CheckSPAlignment();
+    address = SP[];
+else
+    address = X[n];
+
+address = address + offset;
+
+case memop of
+    when MemOp_STORE
+        data = V[t];
+        Mem[address, datasize DIV 8, AccType_VEC] = data;
+
+    when MemOp_LOAD
+        data = Mem[address, datasize DIV 8, AccType_VEC];
+        V[t] = data;
+

Supported architectures

A32/A64

Description

AES single round encryption.

+

A64 Instruction

AESE Vd.16B,Vn.16B
+

Argument Preparation

data → Vd.16B 
+key → Vn.16B

Results

Vd.16B → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand1 = V[d];
+bits(128) operand2 = V[n];
+bits(128) result;
+result = operand1 EOR operand2;
+result = AESSubBytes(AESShiftRows(result));
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

AES single round decryption.

+

A64 Instruction

AESD Vd.16B,Vn.16B
+

Argument Preparation

data → Vd.16B 
+key → Vn.16B

Results

Vd.16B → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand1 = V[d];
+bits(128) operand2 = V[n];
+bits(128) result;
+result = operand1 EOR operand2;
+result = AESInvSubBytes(AESInvShiftRows(result));
+V[d] = result;
+

Supported architectures

A32/A64

Description

AES mix columns.

+

A64 Instruction

AESMC Vd.16B,Vn.16B
+

Argument Preparation

data → Vn.16B 

Results

Vd.16B → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand = V[n];
+bits(128) result;
+result = AESMixColumns(operand);
+V[d] = result;
+

Supported architectures

A32/A64

Description

AES inverse mix columns.

+

A64 Instruction

AESIMC Vd.16B,Vn.16B
+

Argument Preparation

data → Vn.16B 

Results

Vd.16B → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand = V[n];
+bits(128) result;
+result = AESInvMixColumns(operand);
+V[d] = result;
+

Supported architectures

A32/A64

Description

SHA1 hash update (choose).

+

A64 Instruction

SHA1C Qd,Sn,Vm.4S
+

Argument Preparation

hash_abcd → Qd 
+hash_e → Sn
+wk → Vm.4S

Results

Qd → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) X = V[d];
+bits(32) Y = V[n];    // Note: 32 not 128 bits wide
+bits(128) W = V[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = SHAchoose(X<63:32>, X<95:64>, X<127:96>);
+    Y = Y + ROL(X<31:0>, 5) + t + Elem[W, e, 32];
+    X<63:32> = ROL(X<63:32>, 30);
+    <Y, X> = ROL(Y:X, 32);
+V[d] = X;
+

Supported architectures

A32/A64

Description

SHA1 hash update (parity).

+

A64 Instruction

SHA1P Qd,Sn,Vm.4S
+

Argument Preparation

hash_abcd → Qd 
+hash_e → Sn
+wk → Vm.4S

Results

Qd → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) X = V[d];
+bits(32) Y = V[n];    // Note: 32 not 128 bits wide
+bits(128) W = V[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = SHAparity(X<63:32>, X<95:64>, X<127:96>);
+    Y = Y + ROL(X<31:0>, 5) + t + Elem[W, e, 32];
+    X<63:32> = ROL(X<63:32>, 30);
+    <Y, X> = ROL(Y:X, 32);
+V[d] = X;
+

Supported architectures

A32/A64

Description

SHA1 hash update (majority).

+

A64 Instruction

SHA1M Qd,Sn,Vm.4S
+

Argument Preparation

hash_abcd → Qd 
+hash_e → Sn
+wk → Vm.4S

Results

Qd → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) X = V[d];
+bits(32) Y = V[n];    // Note: 32 not 128 bits wide
+bits(128) W = V[m];
+bits(32) t;
+
+for e = 0 to 3
+    t = SHAmajority(X<63:32>, X<95:64>, X<127:96>);
+    Y = Y + ROL(X<31:0>, 5) + t + Elem[W, e, 32];
+    X<63:32> = ROL(X<63:32>, 30);
+    <Y, X> = ROL(Y:X, 32);
+V[d] = X;
+

Supported architectures

A32/A64

Description

SHA1 fixed rotate.

+

A64 Instruction

SHA1H Sd,Sn
+

Argument Preparation

hash_e → Sn 

Results

Sd → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(32) operand = V[n];    // read element [0] only,  [1-3] zeroed
+V[d] = ROL(operand, 30);
+

Supported architectures

A32/A64

Description

SHA1 schedule update 0.

+

A64 Instruction

SHA1SU0 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

w0_3 → Vd.4S 
+w4_7 → Vn.4S
+w8_11 → Vm.4S

Results

Vd.4S → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand1 = V[d];
+bits(128) operand2 = V[n];
+bits(128) operand3 = V[m];
+bits(128) result;
+
+result = operand2<63:0>:operand1<127:64>;
+result = result EOR operand1 EOR operand3;
+V[d] = result;
+

Supported architectures

A32/A64

Description

SHA1 schedule update 1.

+

A64 Instruction

SHA1SU1 Vd.4S,Vn.4S
+

Argument Preparation

tw0_3 → Vd.4S 
+w12_15 → Vn.4S

Results

Vd.4S → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand1 = V[d];
+bits(128) operand2 = V[n];
+bits(128) result;
+bits(128) T = operand1 EOR LSR(operand2, 32);
+result<31:0> = ROL(T<31:0>, 1);
+result<63:32> = ROL(T<63:32>, 1);
+result<95:64> = ROL(T<95:64>, 1);
+result<127:96> = ROL(T<127:96>, 1) EOR ROL(T<31:0>, 2);
+V[d] = result;
+

Supported architectures

A32/A64

Description

SHA256 hash update (part 1).

+

A64 Instruction

SHA256H Qd,Qn,Vm.4S
+

Argument Preparation

hash_abcd → Qd 
+hash_efgh → Qn
+wk → Vm.4S

Results

Qd → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) result;
+result = SHA256hash(V[d], V[n], V[m], TRUE);
+V[d] = result;
+

Supported architectures

A32/A64

Description

SHA256 hash update (part 2).

+

A64 Instruction

SHA256H2 Qd,Qn,Vm.4S
+

Argument Preparation

hash_efgh → Qd 
+hash_abcd → Qn
+wk → Vm.4S

Results

Qd → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) result;
+result = SHA256hash(V[n], V[d], V[m], FALSE);
+V[d] = result;
+

Supported architectures

A32/A64

Description

SHA256 schedule update 0.

+

A64 Instruction

SHA256SU0 Vd.4S,Vn.4S
+

Argument Preparation

w0_3 → Vd.4S 
+w4_7 → Vn.4S

Results

Vd.4S → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand1 = V[d];
+bits(128) operand2 = V[n];
+bits(128) result;
+bits(128) T = operand2<31:0>:operand1<127:32>;
+bits(32) elt;
+
+for e = 0 to 3
+    elt = Elem[T, e, 32];
+    elt = ROR(elt, 7) EOR ROR(elt, 18) EOR LSR(elt, 3);
+    Elem[result, e, 32] = elt + Elem[operand1, e, 32];
+V[d] = result;
+

Supported architectures

A32/A64

Description

SHA256 schedule update 1.

+

A64 Instruction

SHA256SU1 Vd.4S,Vn.4S,Vm.4S
+

Argument Preparation

tw0_3 → Vd.4S 
+w8_11 → Vn.4S
+w12_15 → Vm.4S

Results

Vd.4S → result
+

Operation

+
AArch64.CheckFPAdvSIMDEnabled();
+
+bits(128) operand1 = V[d];
+bits(128) operand2 = V[n];
+bits(128) operand3 = V[m];
+bits(128) result;
+bits(128) T0 = operand3<31:0>:operand2<127:32>;
+bits(64) T1;
+bits(32) elt;
+
+T1 = operand3<127:64>;
+for e = 0 to 1
+    elt = Elem[T1, e, 32];
+    elt = ROR(elt, 17) EOR ROR(elt, 19) EOR LSR(elt, 10);
+    elt = elt + Elem[operand1, e, 32] + Elem[T0, e, 32];
+    Elem[result, e, 32] = elt;
+
+T1 = result<63:0>;
+for e = 2 to 3
+    elt = Elem[T1, e-2, 32];
+    elt = ROR(elt, 17) EOR ROR(elt, 19) EOR LSR(elt, 10);
+    elt = elt + Elem[operand1, e, 32] + Elem[T0, e, 32];
+    Elem[result, e, 32] = elt;
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

PMULL Vd.1Q,Vn.1D,Vm.1D
+

Argument Preparation

a → Vn.1D 
+b → Vm.1D

Results

Vd.1Q → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, 2*esize] = PolynomialMult(element1, element2);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

+

A64 Instruction

PMULL2 Vd.1Q,Vn.2D,Vm.2D
+

Argument Preparation

a → Vn.2D 
+b → Vm.2D

Results

Vd.1Q → result
+

Operation

+
CheckFPAdvSIMDEnabled64();
+bits(datasize) operand1 = Vpart[n, part];
+bits(datasize) operand2 = Vpart[m, part];
+bits(2*datasize) result;
+bits(esize) element1;
+bits(esize) element2;
+
+for e = 0 to elements-1
+    element1 = Elem[operand1, e, esize];
+    element2 = Elem[operand2, e, esize];
+    Elem[result, e, 2*esize] = PolynomialMult(element1, element2);
+
+V[d] = result;
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.

+

A64 Instruction

CRC32B Wd,Wn,Wm
+

Argument Preparation

a → Wn 
+b → Wm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x04C11DB7<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.

+

A64 Instruction

CRC32H Wd,Wn,Wm
+

Argument Preparation

a → Wn 
+b → Wm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x04C11DB7<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.

+

A64 Instruction

CRC32W Wd,Wn,Wm
+

Argument Preparation

a → Wn 
+b → Wm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x04C11DB7<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.

+

A64 Instruction

CRC32X Wd,Wn,Xm
+

Argument Preparation

a → Wn 
+b → Xm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x04C11DB7<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.

+

A64 Instruction

CRC32CB Wd,Wn,Wm
+

Argument Preparation

a → Wn 
+b → Wm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x1EDC6F41<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.

+

A64 Instruction

CRC32CH Wd,Wn,Wm
+

Argument Preparation

a → Wn 
+b → Wm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x1EDC6F41<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.

+

A64 Instruction

CRC32CW Wd,Wn,Wm
+

Argument Preparation

a → Wn 
+b → Wm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x1EDC6F41<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

Description

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.

+

A64 Instruction

CRC32CX Wd,Wn,Xm
+

Argument Preparation

a → Wn 
+b → Xm

Results

Wd → result
+

Operation

+
bits(32) acc = X[n];    // accumulator
+bits(size) val = X[m];    // input value
+bits(32) poly = 0x1EDC6F41<31:0>;
+
+bits(32+size) tempacc = BitReverse(acc):Zeros(size);
+bits(size+32) tempval = BitReverse(val):Zeros(32);
+
+// Poly32Mod2 on a bitstring does a polynomial Modulus over {0,1} operation
+X[d] = BitReverse(Poly32Mod2(tempacc EOR tempval, poly));
+

Supported architectures

A32/A64

+
+ +
+
+
+ +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/library/stdarch/crates/stdarch-verify/build.rs b/library/stdarch/crates/stdarch-verify/build.rs new file mode 100644 index 00000000000..c0dc81b6a61 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/build.rs @@ -0,0 +1,28 @@ +use std::path::Path; + +fn main() { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let root = dir.parent().unwrap(); + eprintln!("root: {}", root.display()); + walk(&root.join("core_arch/src/x86")); + walk(&root.join("core_arch/src/x86_64")); + walk(&root.join("core_arch/src/arm")); + walk(&root.join("core_arch/src/aarch64")); +} + +fn walk(root: &Path) { + for file in root.read_dir().unwrap() { + eprintln!("root: {}", root.display()); + let file = file.unwrap(); + if file.file_type().unwrap().is_dir() { + walk(&file.path()); + continue; + } + let path = file.path(); + if path.extension().and_then(|s| s.to_str()) != Some("rs") { + continue; + } + + println!("cargo:rerun-if-changed={}", path.display()); + } +} diff --git a/library/stdarch/crates/stdarch-verify/mips-msa.h b/library/stdarch/crates/stdarch-verify/mips-msa.h new file mode 100644 index 00000000000..881f1918f6b --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/mips-msa.h @@ -0,0 +1,707 @@ +v16i8 __builtin_msa_add_a_b (v16i8, v16i8); +v8i16 __builtin_msa_add_a_h (v8i16, v8i16); +v4i32 __builtin_msa_add_a_w (v4i32, v4i32); +v2i64 __builtin_msa_add_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_adds_a_b (v16i8, v16i8); +v8i16 __builtin_msa_adds_a_h (v8i16, v8i16); +v4i32 __builtin_msa_adds_a_w (v4i32, v4i32); +v2i64 __builtin_msa_adds_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_adds_s_b (v16i8, v16i8); +v8i16 __builtin_msa_adds_s_h (v8i16, v8i16); +v4i32 __builtin_msa_adds_s_w (v4i32, v4i32); +v2i64 __builtin_msa_adds_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_adds_u_b (v16u8, v16u8); +v8u16 __builtin_msa_adds_u_h (v8u16, v8u16); +v4u32 __builtin_msa_adds_u_w (v4u32, v4u32); +v2u64 __builtin_msa_adds_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_addv_b (v16i8, v16i8); +v8i16 __builtin_msa_addv_h (v8i16, v8i16); +v4i32 __builtin_msa_addv_w (v4i32, v4i32); +v2i64 __builtin_msa_addv_d (v2i64, v2i64); + +v16i8 __builtin_msa_addvi_b (v16i8, imm0_31); +v8i16 __builtin_msa_addvi_h (v8i16, imm0_31); +v4i32 __builtin_msa_addvi_w (v4i32, imm0_31); +v2i64 __builtin_msa_addvi_d (v2i64, imm0_31); + +v16u8 __builtin_msa_and_v (v16u8, v16u8); + +v16u8 __builtin_msa_andi_b (v16u8, imm0_255); + +v16i8 __builtin_msa_asub_s_b (v16i8, v16i8); +v8i16 __builtin_msa_asub_s_h (v8i16, v8i16); +v4i32 __builtin_msa_asub_s_w (v4i32, v4i32); +v2i64 __builtin_msa_asub_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_asub_u_b (v16u8, v16u8); +v8u16 __builtin_msa_asub_u_h (v8u16, v8u16); +v4u32 __builtin_msa_asub_u_w (v4u32, v4u32); +v2u64 __builtin_msa_asub_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_ave_s_b (v16i8, v16i8); +v8i16 __builtin_msa_ave_s_h (v8i16, v8i16); +v4i32 __builtin_msa_ave_s_w (v4i32, v4i32); +v2i64 __builtin_msa_ave_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_ave_u_b (v16u8, v16u8); +v8u16 __builtin_msa_ave_u_h (v8u16, v8u16); +v4u32 __builtin_msa_ave_u_w (v4u32, v4u32); +v2u64 __builtin_msa_ave_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_aver_s_b (v16i8, v16i8); +v8i16 __builtin_msa_aver_s_h (v8i16, v8i16); +v4i32 __builtin_msa_aver_s_w (v4i32, v4i32); +v2i64 __builtin_msa_aver_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_aver_u_b (v16u8, v16u8); +v8u16 __builtin_msa_aver_u_h (v8u16, v8u16); +v4u32 __builtin_msa_aver_u_w (v4u32, v4u32); +v2u64 __builtin_msa_aver_u_d (v2u64, v2u64); + +v16u8 __builtin_msa_bclr_b (v16u8, v16u8); +v8u16 __builtin_msa_bclr_h (v8u16, v8u16); +v4u32 __builtin_msa_bclr_w (v4u32, v4u32); +v2u64 __builtin_msa_bclr_d (v2u64, v2u64); + +v16u8 __builtin_msa_bclri_b (v16u8, imm0_7); +v8u16 __builtin_msa_bclri_h (v8u16, imm0_15); +v4u32 __builtin_msa_bclri_w (v4u32, imm0_31); +v2u64 __builtin_msa_bclri_d (v2u64, imm0_63); + +v16u8 __builtin_msa_binsl_b (v16u8, v16u8, v16u8); +v8u16 __builtin_msa_binsl_h (v8u16, v8u16, v8u16); +v4u32 __builtin_msa_binsl_w (v4u32, v4u32, v4u32); +v2u64 __builtin_msa_binsl_d (v2u64, v2u64, v2u64); + +v16u8 __builtin_msa_binsli_b (v16u8, v16u8, imm0_7); +v8u16 __builtin_msa_binsli_h (v8u16, v8u16, imm0_15); +v4u32 __builtin_msa_binsli_w (v4u32, v4u32, imm0_31); +v2u64 __builtin_msa_binsli_d (v2u64, v2u64, imm0_63); + +v16u8 __builtin_msa_binsr_b (v16u8, v16u8, v16u8); +v8u16 __builtin_msa_binsr_h (v8u16, v8u16, v8u16); +v4u32 __builtin_msa_binsr_w (v4u32, v4u32, v4u32); +v2u64 __builtin_msa_binsr_d (v2u64, v2u64, v2u64); + +v16u8 __builtin_msa_binsri_b (v16u8, v16u8, imm0_7); +v8u16 __builtin_msa_binsri_h (v8u16, v8u16, imm0_15); +v4u32 __builtin_msa_binsri_w (v4u32, v4u32, imm0_31); +v2u64 __builtin_msa_binsri_d (v2u64, v2u64, imm0_63); + +v16u8 __builtin_msa_bmnz_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bmnzi_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bmz_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bmzi_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bneg_b (v16u8, v16u8); +v8u16 __builtin_msa_bneg_h (v8u16, v8u16); +v4u32 __builtin_msa_bneg_w (v4u32, v4u32); +v2u64 __builtin_msa_bneg_d (v2u64, v2u64); + +v16u8 __builtin_msa_bnegi_b (v16u8, imm0_7); +v8u16 __builtin_msa_bnegi_h (v8u16, imm0_15); +v4u32 __builtin_msa_bnegi_w (v4u32, imm0_31); +v2u64 __builtin_msa_bnegi_d (v2u64, imm0_63); + +i32 __builtin_msa_bnz_b (v16u8); +i32 __builtin_msa_bnz_h (v8u16); +i32 __builtin_msa_bnz_w (v4u32); +i32 __builtin_msa_bnz_d (v2u64); + +i32 __builtin_msa_bnz_v (v16u8); + +v16u8 __builtin_msa_bsel_v (v16u8, v16u8, v16u8); + +v16u8 __builtin_msa_bseli_b (v16u8, v16u8, imm0_255); + +v16u8 __builtin_msa_bset_b (v16u8, v16u8); +v8u16 __builtin_msa_bset_h (v8u16, v8u16); +v4u32 __builtin_msa_bset_w (v4u32, v4u32); +v2u64 __builtin_msa_bset_d (v2u64, v2u64); + +v16u8 __builtin_msa_bseti_b (v16u8, imm0_7); +v8u16 __builtin_msa_bseti_h (v8u16, imm0_15); +v4u32 __builtin_msa_bseti_w (v4u32, imm0_31); +v2u64 __builtin_msa_bseti_d (v2u64, imm0_63); + +i32 __builtin_msa_bz_b (v16u8); +i32 __builtin_msa_bz_h (v8u16); +i32 __builtin_msa_bz_w (v4u32); +i32 __builtin_msa_bz_d (v2u64); + +i32 __builtin_msa_bz_v (v16u8); + +v16i8 __builtin_msa_ceq_b (v16i8, v16i8); +v8i16 __builtin_msa_ceq_h (v8i16, v8i16); +v4i32 __builtin_msa_ceq_w (v4i32, v4i32); +v2i64 __builtin_msa_ceq_d (v2i64, v2i64); + +v16i8 __builtin_msa_ceqi_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_ceqi_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_ceqi_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_ceqi_d (v2i64, imm_n16_15); + +i32 __builtin_msa_cfcmsa (imm0_31); + +v16i8 __builtin_msa_cle_s_b (v16i8, v16i8); +v8i16 __builtin_msa_cle_s_h (v8i16, v8i16); +v4i32 __builtin_msa_cle_s_w (v4i32, v4i32); +v2i64 __builtin_msa_cle_s_d (v2i64, v2i64); + +v16i8 __builtin_msa_cle_u_b (v16u8, v16u8); +v8i16 __builtin_msa_cle_u_h (v8u16, v8u16); +v4i32 __builtin_msa_cle_u_w (v4u32, v4u32); +v2i64 __builtin_msa_cle_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_clei_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_clei_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_clei_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_clei_s_d (v2i64, imm_n16_15); + +v16i8 __builtin_msa_clei_u_b (v16u8, imm0_31); +v8i16 __builtin_msa_clei_u_h (v8u16, imm0_31); +v4i32 __builtin_msa_clei_u_w (v4u32, imm0_31); +v2i64 __builtin_msa_clei_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_clt_s_b (v16i8, v16i8); +v8i16 __builtin_msa_clt_s_h (v8i16, v8i16); +v4i32 __builtin_msa_clt_s_w (v4i32, v4i32); +v2i64 __builtin_msa_clt_s_d (v2i64, v2i64); + +v16i8 __builtin_msa_clt_u_b (v16u8, v16u8); +v8i16 __builtin_msa_clt_u_h (v8u16, v8u16); +v4i32 __builtin_msa_clt_u_w (v4u32, v4u32); +v2i64 __builtin_msa_clt_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_clti_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_clti_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_clti_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_clti_s_d (v2i64, imm_n16_15); + +v16i8 __builtin_msa_clti_u_b (v16u8, imm0_31); +v8i16 __builtin_msa_clti_u_h (v8u16, imm0_31); +v4i32 __builtin_msa_clti_u_w (v4u32, imm0_31); +v2i64 __builtin_msa_clti_u_d (v2u64, imm0_31); + +i32 __builtin_msa_copy_s_b (v16i8, imm0_15); +i32 __builtin_msa_copy_s_h (v8i16, imm0_7); +i32 __builtin_msa_copy_s_w (v4i32, imm0_3); +i64 __builtin_msa_copy_s_d (v2i64, imm0_1); + +u32 __builtin_msa_copy_u_b (v16i8, imm0_15); +u32 __builtin_msa_copy_u_h (v8i16, imm0_7); +u32 __builtin_msa_copy_u_w (v4i32, imm0_3); +u64 __builtin_msa_copy_u_d (v2i64, imm0_1); + +void __builtin_msa_ctcmsa (imm0_31, i32); + +v16i8 __builtin_msa_div_s_b (v16i8, v16i8); +v8i16 __builtin_msa_div_s_h (v8i16, v8i16); +v4i32 __builtin_msa_div_s_w (v4i32, v4i32); +v2i64 __builtin_msa_div_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_div_u_b (v16u8, v16u8); +v8u16 __builtin_msa_div_u_h (v8u16, v8u16); +v4u32 __builtin_msa_div_u_w (v4u32, v4u32); +v2u64 __builtin_msa_div_u_d (v2u64, v2u64); + +v8i16 __builtin_msa_dotp_s_h (v16i8, v16i8); +v4i32 __builtin_msa_dotp_s_w (v8i16, v8i16); +v2i64 __builtin_msa_dotp_s_d (v4i32, v4i32); + +v8u16 __builtin_msa_dotp_u_h (v16u8, v16u8); +v4u32 __builtin_msa_dotp_u_w (v8u16, v8u16); +v2u64 __builtin_msa_dotp_u_d (v4u32, v4u32); + +v8i16 __builtin_msa_dpadd_s_h (v8i16, v16i8, v16i8); +v4i32 __builtin_msa_dpadd_s_w (v4i32, v8i16, v8i16); +v2i64 __builtin_msa_dpadd_s_d (v2i64, v4i32, v4i32); + +v8u16 __builtin_msa_dpadd_u_h (v8u16, v16u8, v16u8); +v4u32 __builtin_msa_dpadd_u_w (v4u32, v8u16, v8u16); +v2u64 __builtin_msa_dpadd_u_d (v2u64, v4u32, v4u32); + +v8i16 __builtin_msa_dpsub_s_h (v8i16, v16i8, v16i8); +v4i32 __builtin_msa_dpsub_s_w (v4i32, v8i16, v8i16); +v2i64 __builtin_msa_dpsub_s_d (v2i64, v4i32, v4i32); + +v8i16 __builtin_msa_dpsub_u_h (v8i16, v16u8, v16u8); +v4i32 __builtin_msa_dpsub_u_w (v4i32, v8u16, v8u16); +v2i64 __builtin_msa_dpsub_u_d (v2i64, v4u32, v4u32); + +v4f32 __builtin_msa_fadd_w (v4f32, v4f32); +v2f64 __builtin_msa_fadd_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcaf_w (v4f32, v4f32); +v2i64 __builtin_msa_fcaf_d (v2f64, v2f64); + +v4i32 __builtin_msa_fceq_w (v4f32, v4f32); +v2i64 __builtin_msa_fceq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fclass_w (v4f32); +v2i64 __builtin_msa_fclass_d (v2f64); + +v4i32 __builtin_msa_fcle_w (v4f32, v4f32); +v2i64 __builtin_msa_fcle_d (v2f64, v2f64); + +v4i32 __builtin_msa_fclt_w (v4f32, v4f32); +v2i64 __builtin_msa_fclt_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcne_w (v4f32, v4f32); +v2i64 __builtin_msa_fcne_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcor_w (v4f32, v4f32); +v2i64 __builtin_msa_fcor_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcueq_w (v4f32, v4f32); +v2i64 __builtin_msa_fcueq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcule_w (v4f32, v4f32); +v2i64 __builtin_msa_fcule_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcult_w (v4f32, v4f32); +v2i64 __builtin_msa_fcult_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcun_w (v4f32, v4f32); +v2i64 __builtin_msa_fcun_d (v2f64, v2f64); + +v4i32 __builtin_msa_fcune_w (v4f32, v4f32); +v2i64 __builtin_msa_fcune_d (v2f64, v2f64); + +v4f32 __builtin_msa_fdiv_w (v4f32, v4f32); +v2f64 __builtin_msa_fdiv_d (v2f64, v2f64); + +v8i16 __builtin_msa_fexdo_h (v4f32, v4f32); +v4f32 __builtin_msa_fexdo_w (v2f64, v2f64); + +v4f32 __builtin_msa_fexp2_w (v4f32, v4i32); +v2f64 __builtin_msa_fexp2_d (v2f64, v2i64); + +v4f32 __builtin_msa_fexupl_w (v8i16); +v2f64 __builtin_msa_fexupl_d (v4f32); + +v4f32 __builtin_msa_fexupr_w (v8i16); +v2f64 __builtin_msa_fexupr_d (v4f32); + +v4f32 __builtin_msa_ffint_s_w (v4i32); +v2f64 __builtin_msa_ffint_s_d (v2i64); + +v4f32 __builtin_msa_ffint_u_w (v4u32); +v2f64 __builtin_msa_ffint_u_d (v2u64); + +v4f32 __builtin_msa_ffql_w (v8i16); +v2f64 __builtin_msa_ffql_d (v4i32); + +v4f32 __builtin_msa_ffqr_w (v8i16); +v2f64 __builtin_msa_ffqr_d (v4i32); + +v16i8 __builtin_msa_fill_b (i32); +v8i16 __builtin_msa_fill_h (i32); +v4i32 __builtin_msa_fill_w (i32); +v2i64 __builtin_msa_fill_d (i64); + +v4f32 __builtin_msa_flog2_w (v4f32); +v2f64 __builtin_msa_flog2_d (v2f64); + +v4f32 __builtin_msa_fmadd_w (v4f32, v4f32, v4f32); +v2f64 __builtin_msa_fmadd_d (v2f64, v2f64, v2f64); + +v4f32 __builtin_msa_fmax_w (v4f32, v4f32); +v2f64 __builtin_msa_fmax_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmax_a_w (v4f32, v4f32); +v2f64 __builtin_msa_fmax_a_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmin_w (v4f32, v4f32); +v2f64 __builtin_msa_fmin_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmin_a_w (v4f32, v4f32); +v2f64 __builtin_msa_fmin_a_d (v2f64, v2f64); + +v4f32 __builtin_msa_fmsub_w (v4f32, v4f32, v4f32); +v2f64 __builtin_msa_fmsub_d (v2f64, v2f64, v2f64); + +v4f32 __builtin_msa_fmul_w (v4f32, v4f32); +v2f64 __builtin_msa_fmul_d (v2f64, v2f64); + +v4f32 __builtin_msa_frint_w (v4f32); +v2f64 __builtin_msa_frint_d (v2f64); + +v4f32 __builtin_msa_frcp_w (v4f32); +v2f64 __builtin_msa_frcp_d (v2f64); + +v4f32 __builtin_msa_frsqrt_w (v4f32); +v2f64 __builtin_msa_frsqrt_d (v2f64); + +v4i32 __builtin_msa_fsaf_w (v4f32, v4f32); +v2i64 __builtin_msa_fsaf_d (v2f64, v2f64); + +v4i32 __builtin_msa_fseq_w (v4f32, v4f32); +v2i64 __builtin_msa_fseq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsle_w (v4f32, v4f32); +v2i64 __builtin_msa_fsle_d (v2f64, v2f64); + +v4i32 __builtin_msa_fslt_w (v4f32, v4f32); +v2i64 __builtin_msa_fslt_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsne_w (v4f32, v4f32); +v2i64 __builtin_msa_fsne_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsor_w (v4f32, v4f32); +v2i64 __builtin_msa_fsor_d (v2f64, v2f64); + +v4f32 __builtin_msa_fsqrt_w (v4f32); +v2f64 __builtin_msa_fsqrt_d (v2f64); + +v4f32 __builtin_msa_fsub_w (v4f32, v4f32); +v2f64 __builtin_msa_fsub_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsueq_w (v4f32, v4f32); +v2i64 __builtin_msa_fsueq_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsule_w (v4f32, v4f32); +v2i64 __builtin_msa_fsule_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsult_w (v4f32, v4f32); +v2i64 __builtin_msa_fsult_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsun_w (v4f32, v4f32); +v2i64 __builtin_msa_fsun_d (v2f64, v2f64); + +v4i32 __builtin_msa_fsune_w (v4f32, v4f32); +v2i64 __builtin_msa_fsune_d (v2f64, v2f64); + +v4i32 __builtin_msa_ftint_s_w (v4f32); +v2i64 __builtin_msa_ftint_s_d (v2f64); + +v4u32 __builtin_msa_ftint_u_w (v4f32); +v2u64 __builtin_msa_ftint_u_d (v2f64); + +v8i16 __builtin_msa_ftq_h (v4f32, v4f32); +v4i32 __builtin_msa_ftq_w (v2f64, v2f64); + +v4i32 __builtin_msa_ftrunc_s_w (v4f32); +v2i64 __builtin_msa_ftrunc_s_d (v2f64); + +v4u32 __builtin_msa_ftrunc_u_w (v4f32); +v2u64 __builtin_msa_ftrunc_u_d (v2f64); + +v8i16 __builtin_msa_hadd_s_h (v16i8, v16i8); +v4i32 __builtin_msa_hadd_s_w (v8i16, v8i16); +v2i64 __builtin_msa_hadd_s_d (v4i32, v4i32); + +v8u16 __builtin_msa_hadd_u_h (v16u8, v16u8); +v4u32 __builtin_msa_hadd_u_w (v8u16, v8u16); +v2u64 __builtin_msa_hadd_u_d (v4u32, v4u32); + +v8i16 __builtin_msa_hsub_s_h (v16i8, v16i8); +v4i32 __builtin_msa_hsub_s_w (v8i16, v8i16); +v2i64 __builtin_msa_hsub_s_d (v4i32, v4i32); + +v8i16 __builtin_msa_hsub_u_h (v16u8, v16u8); +v4i32 __builtin_msa_hsub_u_w (v8u16, v8u16); +v2i64 __builtin_msa_hsub_u_d (v4u32, v4u32); + +v16i8 __builtin_msa_ilvev_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvev_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvev_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvev_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvl_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvl_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvl_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvl_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvod_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvod_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvod_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvod_d (v2i64, v2i64); + +v16i8 __builtin_msa_ilvr_b (v16i8, v16i8); +v8i16 __builtin_msa_ilvr_h (v8i16, v8i16); +v4i32 __builtin_msa_ilvr_w (v4i32, v4i32); +v2i64 __builtin_msa_ilvr_d (v2i64, v2i64); + +v16i8 __builtin_msa_insert_b (v16i8, imm0_15, i32); +v8i16 __builtin_msa_insert_h (v8i16, imm0_7, i32); +v4i32 __builtin_msa_insert_w (v4i32, imm0_3, i32); +v2i64 __builtin_msa_insert_d (v2i64, imm0_1, i64); + +v16i8 __builtin_msa_insve_b (v16i8, imm0_15, v16i8); +v8i16 __builtin_msa_insve_h (v8i16, imm0_7, v8i16); +v4i32 __builtin_msa_insve_w (v4i32, imm0_3, v4i32); +v2i64 __builtin_msa_insve_d (v2i64, imm0_1, v2i64); + +v16i8 __builtin_msa_ld_b (void *, imm_n512_511); +v8i16 __builtin_msa_ld_h (void *, imm_n1024_1022); +v4i32 __builtin_msa_ld_w (void *, imm_n2048_2044); +v2i64 __builtin_msa_ld_d (void *, imm_n4096_4088); + +v16i8 __builtin_msa_ldi_b (imm_n512_511); +v8i16 __builtin_msa_ldi_h (imm_n512_511); +v4i32 __builtin_msa_ldi_w (imm_n512_511); +v2i64 __builtin_msa_ldi_d (imm_n512_511); + +v8i16 __builtin_msa_madd_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_madd_q_w (v4i32, v4i32, v4i32); + +v8i16 __builtin_msa_maddr_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_maddr_q_w (v4i32, v4i32, v4i32); + +v16i8 __builtin_msa_maddv_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_maddv_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_maddv_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_maddv_d (v2i64, v2i64, v2i64); + +v16i8 __builtin_msa_max_a_b (v16i8, v16i8); +v8i16 __builtin_msa_max_a_h (v8i16, v8i16); +v4i32 __builtin_msa_max_a_w (v4i32, v4i32); +v2i64 __builtin_msa_max_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_max_s_b (v16i8, v16i8); +v8i16 __builtin_msa_max_s_h (v8i16, v8i16); +v4i32 __builtin_msa_max_s_w (v4i32, v4i32); +v2i64 __builtin_msa_max_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_max_u_b (v16u8, v16u8); +v8u16 __builtin_msa_max_u_h (v8u16, v8u16); +v4u32 __builtin_msa_max_u_w (v4u32, v4u32); +v2u64 __builtin_msa_max_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_maxi_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_maxi_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_maxi_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_maxi_s_d (v2i64, imm_n16_15); + +v16u8 __builtin_msa_maxi_u_b (v16u8, imm0_31); +v8u16 __builtin_msa_maxi_u_h (v8u16, imm0_31); +v4u32 __builtin_msa_maxi_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_maxi_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_min_a_b (v16i8, v16i8); +v8i16 __builtin_msa_min_a_h (v8i16, v8i16); +v4i32 __builtin_msa_min_a_w (v4i32, v4i32); +v2i64 __builtin_msa_min_a_d (v2i64, v2i64); + +v16i8 __builtin_msa_min_s_b (v16i8, v16i8); +v8i16 __builtin_msa_min_s_h (v8i16, v8i16); +v4i32 __builtin_msa_min_s_w (v4i32, v4i32); +v2i64 __builtin_msa_min_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_min_u_b (v16u8, v16u8); +v8u16 __builtin_msa_min_u_h (v8u16, v8u16); +v4u32 __builtin_msa_min_u_w (v4u32, v4u32); +v2u64 __builtin_msa_min_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_mini_s_b (v16i8, imm_n16_15); +v8i16 __builtin_msa_mini_s_h (v8i16, imm_n16_15); +v4i32 __builtin_msa_mini_s_w (v4i32, imm_n16_15); +v2i64 __builtin_msa_mini_s_d (v2i64, imm_n16_15); + +v16u8 __builtin_msa_mini_u_b (v16u8, imm0_31); +v8u16 __builtin_msa_mini_u_h (v8u16, imm0_31); +v4u32 __builtin_msa_mini_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_mini_u_d (v2u64, imm0_31); + +v16i8 __builtin_msa_mod_s_b (v16i8, v16i8); +v8i16 __builtin_msa_mod_s_h (v8i16, v8i16); +v4i32 __builtin_msa_mod_s_w (v4i32, v4i32); +v2i64 __builtin_msa_mod_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_mod_u_b (v16u8, v16u8); +v8u16 __builtin_msa_mod_u_h (v8u16, v8u16); +v4u32 __builtin_msa_mod_u_w (v4u32, v4u32); +v2u64 __builtin_msa_mod_u_d (v2u64, v2u64); + +v16i8 __builtin_msa_move_v (v16i8); + +v8i16 __builtin_msa_msub_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msub_q_w (v4i32, v4i32, v4i32); + +v8i16 __builtin_msa_msubr_q_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msubr_q_w (v4i32, v4i32, v4i32); + +v16i8 __builtin_msa_msubv_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_msubv_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_msubv_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_msubv_d (v2i64, v2i64, v2i64); + +v8i16 __builtin_msa_mul_q_h (v8i16, v8i16); +v4i32 __builtin_msa_mul_q_w (v4i32, v4i32); + +v8i16 __builtin_msa_mulr_q_h (v8i16, v8i16); +v4i32 __builtin_msa_mulr_q_w (v4i32, v4i32); + +v16i8 __builtin_msa_mulv_b (v16i8, v16i8); +v8i16 __builtin_msa_mulv_h (v8i16, v8i16); +v4i32 __builtin_msa_mulv_w (v4i32, v4i32); +v2i64 __builtin_msa_mulv_d (v2i64, v2i64); + +v16i8 __builtin_msa_nloc_b (v16i8); +v8i16 __builtin_msa_nloc_h (v8i16); +v4i32 __builtin_msa_nloc_w (v4i32); +v2i64 __builtin_msa_nloc_d (v2i64); + +v16i8 __builtin_msa_nlzc_b (v16i8); +v8i16 __builtin_msa_nlzc_h (v8i16); +v4i32 __builtin_msa_nlzc_w (v4i32); +v2i64 __builtin_msa_nlzc_d (v2i64); + +v16u8 __builtin_msa_nor_v (v16u8, v16u8); + +v16u8 __builtin_msa_nori_b (v16u8, imm0_255); + +v16u8 __builtin_msa_or_v (v16u8, v16u8); + +v16u8 __builtin_msa_ori_b (v16u8, imm0_255); + +v16i8 __builtin_msa_pckev_b (v16i8, v16i8); +v8i16 __builtin_msa_pckev_h (v8i16, v8i16); +v4i32 __builtin_msa_pckev_w (v4i32, v4i32); +v2i64 __builtin_msa_pckev_d (v2i64, v2i64); + +v16i8 __builtin_msa_pckod_b (v16i8, v16i8); +v8i16 __builtin_msa_pckod_h (v8i16, v8i16); +v4i32 __builtin_msa_pckod_w (v4i32, v4i32); +v2i64 __builtin_msa_pckod_d (v2i64, v2i64); + +v16i8 __builtin_msa_pcnt_b (v16i8); +v8i16 __builtin_msa_pcnt_h (v8i16); +v4i32 __builtin_msa_pcnt_w (v4i32); +v2i64 __builtin_msa_pcnt_d (v2i64); + +v16i8 __builtin_msa_sat_s_b (v16i8, imm0_7); +v8i16 __builtin_msa_sat_s_h (v8i16, imm0_15); +v4i32 __builtin_msa_sat_s_w (v4i32, imm0_31); +v2i64 __builtin_msa_sat_s_d (v2i64, imm0_63); + +v16u8 __builtin_msa_sat_u_b (v16u8, imm0_7); +v8u16 __builtin_msa_sat_u_h (v8u16, imm0_15); +v4u32 __builtin_msa_sat_u_w (v4u32, imm0_31); +v2u64 __builtin_msa_sat_u_d (v2u64, imm0_63); + +v16i8 __builtin_msa_shf_b (v16i8, imm0_255); +v8i16 __builtin_msa_shf_h (v8i16, imm0_255); +v4i32 __builtin_msa_shf_w (v4i32, imm0_255); + +v16i8 __builtin_msa_sld_b (v16i8, v16i8, i32); +v8i16 __builtin_msa_sld_h (v8i16, v8i16, i32); +v4i32 __builtin_msa_sld_w (v4i32, v4i32, i32); +v2i64 __builtin_msa_sld_d (v2i64, v2i64, i32); + +v16i8 __builtin_msa_sldi_b (v16i8, v16i8, imm0_15); +v8i16 __builtin_msa_sldi_h (v8i16, v8i16, imm0_7); +v4i32 __builtin_msa_sldi_w (v4i32, v4i32, imm0_3); +v2i64 __builtin_msa_sldi_d (v2i64, v2i64, imm0_1); + +v16i8 __builtin_msa_sll_b (v16i8, v16i8); +v8i16 __builtin_msa_sll_h (v8i16, v8i16); +v4i32 __builtin_msa_sll_w (v4i32, v4i32); +v2i64 __builtin_msa_sll_d (v2i64, v2i64); + +v16i8 __builtin_msa_slli_b (v16i8, imm0_7); +v8i16 __builtin_msa_slli_h (v8i16, imm0_15); +v4i32 __builtin_msa_slli_w (v4i32, imm0_31); +v2i64 __builtin_msa_slli_d (v2i64, imm0_63); + +v16i8 __builtin_msa_splat_b (v16i8, i32); +v8i16 __builtin_msa_splat_h (v8i16, i32); +v4i32 __builtin_msa_splat_w (v4i32, i32); +v2i64 __builtin_msa_splat_d (v2i64, i32); + +v16i8 __builtin_msa_splati_b (v16i8, imm0_15); +v8i16 __builtin_msa_splati_h (v8i16, imm0_7); +v4i32 __builtin_msa_splati_w (v4i32, imm0_3); +v2i64 __builtin_msa_splati_d (v2i64, imm0_1); + +v16i8 __builtin_msa_sra_b (v16i8, v16i8); +v8i16 __builtin_msa_sra_h (v8i16, v8i16); +v4i32 __builtin_msa_sra_w (v4i32, v4i32); +v2i64 __builtin_msa_sra_d (v2i64, v2i64); + +v16i8 __builtin_msa_srai_b (v16i8, imm0_7); +v8i16 __builtin_msa_srai_h (v8i16, imm0_15); +v4i32 __builtin_msa_srai_w (v4i32, imm0_31); +v2i64 __builtin_msa_srai_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srar_b (v16i8, v16i8); +v8i16 __builtin_msa_srar_h (v8i16, v8i16); +v4i32 __builtin_msa_srar_w (v4i32, v4i32); +v2i64 __builtin_msa_srar_d (v2i64, v2i64); + +v16i8 __builtin_msa_srari_b (v16i8, imm0_7); +v8i16 __builtin_msa_srari_h (v8i16, imm0_15); +v4i32 __builtin_msa_srari_w (v4i32, imm0_31); +v2i64 __builtin_msa_srari_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srl_b (v16i8, v16i8); +v8i16 __builtin_msa_srl_h (v8i16, v8i16); +v4i32 __builtin_msa_srl_w (v4i32, v4i32); +v2i64 __builtin_msa_srl_d (v2i64, v2i64); + +v16i8 __builtin_msa_srli_b (v16i8, imm0_7); +v8i16 __builtin_msa_srli_h (v8i16, imm0_15); +v4i32 __builtin_msa_srli_w (v4i32, imm0_31); +v2i64 __builtin_msa_srli_d (v2i64, imm0_63); + +v16i8 __builtin_msa_srlr_b (v16i8, v16i8); +v8i16 __builtin_msa_srlr_h (v8i16, v8i16); +v4i32 __builtin_msa_srlr_w (v4i32, v4i32); +v2i64 __builtin_msa_srlr_d (v2i64, v2i64); + +v16i8 __builtin_msa_srlri_b (v16i8, imm0_7); +v8i16 __builtin_msa_srlri_h (v8i16, imm0_15); +v4i32 __builtin_msa_srlri_w (v4i32, imm0_31); +v2i64 __builtin_msa_srlri_d (v2i64, imm0_63); + +void __builtin_msa_st_b (v16i8, void *, imm_n512_511); +void __builtin_msa_st_h (v8i16, void *, imm_n1024_1022); +void __builtin_msa_st_w (v4i32, void *, imm_n2048_2044); +void __builtin_msa_st_d (v2i64, void *, imm_n4096_4088); + +v16i8 __builtin_msa_subs_s_b (v16i8, v16i8); +v8i16 __builtin_msa_subs_s_h (v8i16, v8i16); +v4i32 __builtin_msa_subs_s_w (v4i32, v4i32); +v2i64 __builtin_msa_subs_s_d (v2i64, v2i64); + +v16u8 __builtin_msa_subs_u_b (v16u8, v16u8); +v8u16 __builtin_msa_subs_u_h (v8u16, v8u16); +v4u32 __builtin_msa_subs_u_w (v4u32, v4u32); +v2u64 __builtin_msa_subs_u_d (v2u64, v2u64); + +v16u8 __builtin_msa_subsus_u_b (v16u8, v16i8); +v8u16 __builtin_msa_subsus_u_h (v8u16, v8i16); +v4u32 __builtin_msa_subsus_u_w (v4u32, v4i32); +v2u64 __builtin_msa_subsus_u_d (v2u64, v2i64); + +v16i8 __builtin_msa_subsuu_s_b (v16u8, v16u8); +v8i16 __builtin_msa_subsuu_s_h (v8u16, v8u16); +v4i32 __builtin_msa_subsuu_s_w (v4u32, v4u32); +v2i64 __builtin_msa_subsuu_s_d (v2u64, v2u64); + +v16i8 __builtin_msa_subv_b (v16i8, v16i8); +v8i16 __builtin_msa_subv_h (v8i16, v8i16); +v4i32 __builtin_msa_subv_w (v4i32, v4i32); +v2i64 __builtin_msa_subv_d (v2i64, v2i64); + +v16i8 __builtin_msa_subvi_b (v16i8, imm0_31); +v8i16 __builtin_msa_subvi_h (v8i16, imm0_31); +v4i32 __builtin_msa_subvi_w (v4i32, imm0_31); +v2i64 __builtin_msa_subvi_d (v2i64, imm0_31); + +v16i8 __builtin_msa_vshf_b (v16i8, v16i8, v16i8); +v8i16 __builtin_msa_vshf_h (v8i16, v8i16, v8i16); +v4i32 __builtin_msa_vshf_w (v4i32, v4i32, v4i32); +v2i64 __builtin_msa_vshf_d (v2i64, v2i64, v2i64); + +v16u8 __builtin_msa_xor_v (v16u8, v16u8); + +v16u8 __builtin_msa_xori_b (v16u8, imm0_255); diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs new file mode 100644 index 00000000000..cbe0530970b --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/src/lib.rs @@ -0,0 +1,387 @@ +extern crate proc_macro; +extern crate proc_macro2; +#[macro_use] +extern crate quote; +#[macro_use] +extern crate syn; + +use proc_macro::TokenStream; +use proc_macro2::Span; +use std::{fs::File, io::Read, path::Path}; +use syn::ext::IdentExt; + +#[proc_macro] +pub fn x86_functions(input: TokenStream) -> TokenStream { + functions(input, &["core_arch/src/x86", "core_arch/src/x86_64"]) +} + +#[proc_macro] +pub fn arm_functions(input: TokenStream) -> TokenStream { + functions(input, &["core_arch/src/arm", "core_arch/src/aarch64"]) +} + +#[proc_macro] +pub fn mips_functions(input: TokenStream) -> TokenStream { + functions(input, &["core_arch/src/mips"]) +} + +fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let root = dir.parent().expect("root-dir not found"); + + let mut files = Vec::new(); + for dir in dirs { + walk(&root.join(dir), &mut files); + } + assert!(!files.is_empty()); + + let mut functions = Vec::new(); + for &mut (ref mut file, ref path) in &mut files { + for item in file.items.drain(..) { + if let syn::Item::Fn(f) = item { + functions.push((f, path)) + } + } + } + assert!(!functions.is_empty()); + + functions.retain(|&(ref f, _)| { + if let syn::Visibility::Public(_) = f.vis { + if f.unsafety.is_some() { + return true; + } + } + false + }); + assert!(!functions.is_empty()); + + let input = proc_macro2::TokenStream::from(input); + + let functions = functions + .iter() + .map(|&(ref f, path)| { + let name = &f.ident; + // println!("{}", name); + let mut arguments = Vec::new(); + for input in f.decl.inputs.iter() { + let ty = match *input { + syn::FnArg::Captured(ref c) => &c.ty, + _ => panic!("invalid argument on {}", name), + }; + arguments.push(to_type(ty)); + } + let ret = match f.decl.output { + syn::ReturnType::Default => quote! { None }, + syn::ReturnType::Type(_, ref t) => { + let ty = to_type(t); + quote! { Some(#ty) } + } + }; + let instrs = find_instrs(&f.attrs); + let target_feature = if let Some(i) = find_target_feature(&f.attrs) { + quote! { Some(#i) } + } else { + quote! { None } + }; + let required_const = find_required_const(&f.attrs); + quote! { + Function { + name: stringify!(#name), + arguments: &[#(#arguments),*], + ret: #ret, + target_feature: #target_feature, + instrs: &[#(#instrs),*], + file: stringify!(#path), + required_const: &[#(#required_const),*], + } + } + }) + .collect::>(); + + let ret = quote! { #input: &[Function] = &[#(#functions),*]; }; + // println!("{}", ret); + ret.into() +} + +fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { + match *t { + syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() { + // x86 ... + "__m128" => quote! { &M128 }, + "__m128d" => quote! { &M128D }, + "__m128i" => quote! { &M128I }, + "__m256" => quote! { &M256 }, + "__m256d" => quote! { &M256D }, + "__m256i" => quote! { &M256I }, + "__m512" => quote! { &M512 }, + "__m512d" => quote! { &M512D }, + "__m512i" => quote! { &M512I }, + "__mmask16" => quote! { &MMASK16 }, + "__m64" => quote! { &M64 }, + "bool" => quote! { &BOOL }, + "f32" => quote! { &F32 }, + "f64" => quote! { &F64 }, + "i16" => quote! { &I16 }, + "i32" => quote! { &I32 }, + "i64" => quote! { &I64 }, + "i8" => quote! { &I8 }, + "u16" => quote! { &U16 }, + "u32" => quote! { &U32 }, + "u64" => quote! { &U64 }, + "u128" => quote! { &U128 }, + "u8" => quote! { &U8 }, + "Ordering" => quote! { &ORDERING }, + "CpuidResult" => quote! { &CPUID }, + + // arm ... + "int8x4_t" => quote! { &I8X4 }, + "int8x8_t" => quote! { &I8X8 }, + "int8x8x2_t" => quote! { &I8X8X2 }, + "int8x8x3_t" => quote! { &I8X8X3 }, + "int8x8x4_t" => quote! { &I8X8X4 }, + "int8x16x2_t" => quote! { &I8X16X2 }, + "int8x16x3_t" => quote! { &I8X16X3 }, + "int8x16x4_t" => quote! { &I8X16X4 }, + "int8x16_t" => quote! { &I8X16 }, + "int16x2_t" => quote! { &I16X2 }, + "int16x4_t" => quote! { &I16X4 }, + "int16x8_t" => quote! { &I16X8 }, + "int32x2_t" => quote! { &I32X2 }, + "int32x4_t" => quote! { &I32X4 }, + "int64x1_t" => quote! { &I64X1 }, + "int64x2_t" => quote! { &I64X2 }, + "uint8x8_t" => quote! { &U8X8 }, + "uint8x8x2_t" => quote! { &U8X8X2 }, + "uint8x16x2_t" => quote! { &U8X16X2 }, + "uint8x16x3_t" => quote! { &U8X16X3 }, + "uint8x16x4_t" => quote! { &U8X16X4 }, + "uint8x8x3_t" => quote! { &U8X8X3 }, + "uint8x8x4_t" => quote! { &U8X8X4 }, + "uint8x16_t" => quote! { &U8X16 }, + "uint16x4_t" => quote! { &U16X4 }, + "uint16x8_t" => quote! { &U16X8 }, + "uint32x2_t" => quote! { &U32X2 }, + "uint32x4_t" => quote! { &U32X4 }, + "uint64x1_t" => quote! { &U64X1 }, + "uint64x2_t" => quote! { &U64X2 }, + "float32x2_t" => quote! { &F32X2 }, + "float32x4_t" => quote! { &F32X4 }, + "float64x1_t" => quote! { &F64X1 }, + "float64x2_t" => quote! { &F64X2 }, + "poly8x8_t" => quote! { &POLY8X8 }, + "poly8x8x2_t" => quote! { &POLY8X8X2 }, + "poly8x8x3_t" => quote! { &POLY8X8X3 }, + "poly8x8x4_t" => quote! { &POLY8X8X4 }, + "poly8x16x2_t" => quote! { &POLY8X16X2 }, + "poly8x16x3_t" => quote! { &POLY8X16X3 }, + "poly8x16x4_t" => quote! { &POLY8X16X4 }, + "poly64x1_t" => quote! { &POLY64X1 }, + "poly64x2_t" => quote! { &POLY64X2 }, + "poly8x16_t" => quote! { &POLY8X16 }, + "poly16x4_t" => quote! { &POLY16X4 }, + "poly16x8_t" => quote! { &POLY16X8 }, + + "v16i8" => quote! { &v16i8 }, + "v8i16" => quote! { &v8i16 }, + "v4i32" => quote! { &v4i32 }, + "v2i64" => quote! { &v2i64 }, + "v16u8" => quote! { &v16u8 }, + "v8u16" => quote! { &v8u16 }, + "v4u32" => quote! { &v4u32 }, + "v2u64" => quote! { &v2u64 }, + "v8f16" => quote! { &v8f16 }, + "v4f32" => quote! { &v4f32 }, + "v2f64" => quote! { &v2f64 }, + + s => panic!("unspported type: \"{}\"", s), + }, + syn::Type::Ptr(syn::TypePtr { + ref elem, + ref mutability, + .. + }) + | syn::Type::Reference(syn::TypeReference { + ref elem, + ref mutability, + .. + }) => { + // Both pointers and references can have a mut token (*mut and &mut) + if mutability.is_some() { + let tokens = to_type(&elem); + quote! { &Type::MutPtr(#tokens) } + } else { + // If they don't (*const or &) then they are "const" + let tokens = to_type(&elem); + quote! { &Type::ConstPtr(#tokens) } + } + } + + syn::Type::Slice(_) => panic!("unsupported slice"), + syn::Type::Array(_) => panic!("unsupported array"), + syn::Type::Tuple(_) => quote! { &TUPLE }, + syn::Type::Never(_) => quote! { &NEVER }, + _ => panic!("unsupported type"), + } +} + +fn extract_path_ident(path: &syn::Path) -> syn::Ident { + if path.leading_colon.is_some() { + panic!("unsupported leading colon in path") + } + if path.segments.len() != 1 { + panic!("unsupported path that needs name resolution") + } + match path + .segments + .first() + .expect("segment not found") + .value() + .arguments + { + syn::PathArguments::None => {} + _ => panic!("unsupported path that has path arguments"), + } + path.segments + .first() + .expect("segment not found") + .value() + .ident + .clone() +} + +fn walk(root: &Path, files: &mut Vec<(syn::File, String)>) { + for file in root.read_dir().unwrap() { + let file = file.unwrap(); + if file.file_type().unwrap().is_dir() { + walk(&file.path(), files); + continue; + } + let path = file.path(); + if path.extension().and_then(std::ffi::OsStr::to_str) != Some("rs") { + continue; + } + + if path.file_name().and_then(std::ffi::OsStr::to_str) == Some("test.rs") { + continue; + } + + let mut contents = String::new(); + File::open(&path) + .unwrap_or_else(|_| panic!("can't open file at path: {}", path.display())) + .read_to_string(&mut contents) + .expect("failed to read file to string"); + + files.push(( + syn::parse_str::(&contents).expect("failed to parse"), + path.display().to_string(), + )); + } +} + +fn find_instrs(attrs: &[syn::Attribute]) -> Vec { + struct AssertInstr { + instr: String, + } + + // A small custom parser to parse out the instruction in `assert_instr`. + // + // TODO: should probably just reuse `Invoc` from the `assert-instr-macro` + // crate. + impl syn::parse::Parse for AssertInstr { + fn parse(content: syn::parse::ParseStream) -> syn::parse::Result { + let input; + parenthesized!(input in content); + let _ = input.parse::()?; + let _ = input.parse::()?; + let ident = input.parse::()?; + if ident != "assert_instr" { + return Err(input.error("expected `assert_instr`")); + } + let instrs; + parenthesized!(instrs in input); + + let mut instr = String::new(); + while !instrs.is_empty() { + if let Ok(lit) = instrs.parse::() { + instr.push_str(&lit.value()); + } else if let Ok(ident) = instrs.call(syn::Ident::parse_any) { + instr.push_str(&ident.to_string()); + } else if instrs.parse::().is_ok() { + instr.push_str("."); + } else if instrs.parse::().is_ok() { + // consume everything remaining + drop(instrs.parse::()); + break; + } else { + return Err(input.error("failed to parse instruction")); + } + } + Ok(Self { instr }) + } + } + + attrs + .iter() + .filter(|a| a.path == syn::Ident::new("cfg_attr", Span::call_site()).into()) + .filter_map(|a| { + syn::parse2::(a.tts.clone()) + .ok() + .map(|a| a.instr) + }) + .collect() +} + +fn find_target_feature(attrs: &[syn::Attribute]) -> Option { + attrs + .iter() + .flat_map(|a| { + if let Some(a) = a.interpret_meta() { + if let syn::Meta::List(i) = a { + if i.ident == "target_feature" { + return i.nested; + } + } + } + syn::punctuated::Punctuated::new() + }) + .filter_map(|nested| match nested { + syn::NestedMeta::Meta(m) => Some(m), + syn::NestedMeta::Literal(_) => None, + }) + .find_map(|m| match m { + syn::Meta::NameValue(ref i) if i.ident == "enable" => Some(i.clone().lit), + _ => None, + }) +} + +fn find_required_const(attrs: &[syn::Attribute]) -> Vec { + attrs + .iter() + .flat_map(|a| { + if a.path.segments[0].ident == "rustc_args_required_const" { + syn::parse::(a.tts.clone().into()) + .unwrap() + .args + } else { + Vec::new() + } + }) + .collect() +} + +struct RustcArgsRequiredConst { + args: Vec, +} + +impl syn::parse::Parse for RustcArgsRequiredConst { + #[allow(clippy::cast_possible_truncation)] + fn parse(input: syn::parse::ParseStream) -> syn::parse::Result { + let content; + parenthesized!(content in input); + let list = + syn::punctuated::Punctuated::::parse_terminated(&content)?; + Ok(Self { + args: list.into_iter().map(|a| a.value() as usize).collect(), + }) + } +} diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs new file mode 100644 index 00000000000..6f92933bc73 --- /dev/null +++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs @@ -0,0 +1,592 @@ +#![allow(bad_style)] + +use std::{collections::HashMap, rc::Rc}; + +use html5ever::{ + driver::ParseOpts, + parse_document, + rcdom::{Node, NodeData, RcDom}, + tendril::TendrilSink, + tree_builder::TreeBuilderOpts, +}; + +struct Function { + name: &'static str, + arguments: &'static [&'static Type], + ret: Option<&'static Type>, + target_feature: Option<&'static str>, + instrs: &'static [&'static str], + file: &'static str, + required_const: &'static [usize], +} + +static F16: Type = Type::PrimFloat(16); +static F32: Type = Type::PrimFloat(32); +static F64: Type = Type::PrimFloat(64); +static I16: Type = Type::PrimSigned(16); +static I32: Type = Type::PrimSigned(32); +static I64: Type = Type::PrimSigned(64); +static I8: Type = Type::PrimSigned(8); +static U16: Type = Type::PrimUnsigned(16); +static U32: Type = Type::PrimUnsigned(32); +static U64: Type = Type::PrimUnsigned(64); +static U8: Type = Type::PrimUnsigned(8); +static NEVER: Type = Type::Never; + +static F16X4: Type = Type::F(16, 4, 1); +static F16X4X2: Type = Type::F(16, 4, 2); +static F16X4X3: Type = Type::F(16, 4, 3); +static F16X4X4: Type = Type::F(16, 4, 4); +static F16X8: Type = Type::F(16, 8, 1); +static F16X8X2: Type = Type::F(16, 8, 2); +static F16X8X3: Type = Type::F(16, 8, 3); +static F16X8X4: Type = Type::F(16, 8, 4); +static F32X2: Type = Type::F(32, 2, 1); +static F32X2X2: Type = Type::F(32, 2, 2); +static F32X2X3: Type = Type::F(32, 2, 3); +static F32X2X4: Type = Type::F(32, 2, 4); +static F32X4: Type = Type::F(32, 4, 1); +static F32X4X2: Type = Type::F(32, 4, 2); +static F32X4X3: Type = Type::F(32, 4, 3); +static F32X4X4: Type = Type::F(32, 4, 4); +static F64X1: Type = Type::F(64, 1, 1); +static F64X1X2: Type = Type::F(64, 1, 2); +static F64X1X3: Type = Type::F(64, 1, 3); +static F64X1X4: Type = Type::F(64, 1, 4); +static F64X2: Type = Type::F(64, 2, 1); +static F64X2X2: Type = Type::F(64, 2, 2); +static F64X2X3: Type = Type::F(64, 2, 3); +static F64X2X4: Type = Type::F(64, 2, 4); +static I16X2: Type = Type::I(16, 2, 1); +static I16X4: Type = Type::I(16, 4, 1); +static I16X4X2: Type = Type::I(16, 4, 2); +static I16X4X3: Type = Type::I(16, 4, 3); +static I16X4X4: Type = Type::I(16, 4, 4); +static I16X8: Type = Type::I(16, 8, 1); +static I16X8X2: Type = Type::I(16, 8, 2); +static I16X8X3: Type = Type::I(16, 8, 3); +static I16X8X4: Type = Type::I(16, 8, 4); +static I32X2: Type = Type::I(32, 2, 1); +static I32X2X2: Type = Type::I(32, 2, 2); +static I32X2X3: Type = Type::I(32, 2, 3); +static I32X2X4: Type = Type::I(32, 2, 4); +static I32X4: Type = Type::I(32, 4, 1); +static I32X4X2: Type = Type::I(32, 4, 2); +static I32X4X3: Type = Type::I(32, 4, 3); +static I32X4X4: Type = Type::I(32, 4, 4); +static I64X1: Type = Type::I(64, 1, 1); +static I64X1X2: Type = Type::I(64, 1, 2); +static I64X1X3: Type = Type::I(64, 1, 3); +static I64X1X4: Type = Type::I(64, 1, 4); +static I64X2: Type = Type::I(64, 2, 1); +static I64X2X2: Type = Type::I(64, 2, 2); +static I64X2X3: Type = Type::I(64, 2, 3); +static I64X2X4: Type = Type::I(64, 2, 4); +static I8X16: Type = Type::I(8, 16, 1); +static I8X16X2: Type = Type::I(8, 16, 2); +static I8X16X3: Type = Type::I(8, 16, 3); +static I8X16X4: Type = Type::I(8, 16, 4); +static I8X4: Type = Type::I(8, 4, 1); +static I8X8: Type = Type::I(8, 8, 1); +static I8X8X2: Type = Type::I(8, 8, 2); +static I8X8X3: Type = Type::I(8, 8, 3); +static I8X8X4: Type = Type::I(8, 8, 4); +static P128: Type = Type::PrimPoly(128); +static P16: Type = Type::PrimPoly(16); +static P16X4X2: Type = Type::P(16, 4, 2); +static P16X4X3: Type = Type::P(16, 4, 3); +static P16X4X4: Type = Type::P(16, 4, 4); +static P16X8X2: Type = Type::P(16, 8, 2); +static P16X8X3: Type = Type::P(16, 8, 3); +static P16X8X4: Type = Type::P(16, 8, 4); +static P64: Type = Type::PrimPoly(64); +static P64X1X2: Type = Type::P(64, 1, 2); +static P64X1X3: Type = Type::P(64, 1, 3); +static P64X1X4: Type = Type::P(64, 1, 4); +static P64X2X2: Type = Type::P(64, 2, 2); +static P64X2X3: Type = Type::P(64, 2, 3); +static P64X2X4: Type = Type::P(64, 2, 4); +static P8: Type = Type::PrimPoly(8); +static POLY16X4: Type = Type::P(16, 4, 1); +static POLY16X8: Type = Type::P(16, 8, 1); +static POLY64X1: Type = Type::P(64, 1, 1); +static POLY64X2: Type = Type::P(64, 2, 1); +static POLY8X16: Type = Type::P(8, 16, 1); +static POLY8X16X2: Type = Type::P(8, 16, 2); +static POLY8X16X3: Type = Type::P(8, 16, 3); +static POLY8X16X4: Type = Type::P(8, 16, 4); +static POLY8X8: Type = Type::P(8, 8, 1); +static POLY8X8X2: Type = Type::P(8, 8, 2); +static POLY8X8X3: Type = Type::P(8, 8, 3); +static POLY8X8X4: Type = Type::P(8, 8, 4); +static U16X4: Type = Type::U(16, 4, 1); +static U16X4X2: Type = Type::U(16, 4, 2); +static U16X4X3: Type = Type::U(16, 4, 3); +static U16X4X4: Type = Type::U(16, 4, 4); +static U16X8: Type = Type::U(16, 8, 1); +static U16X8X2: Type = Type::U(16, 8, 2); +static U16X8X3: Type = Type::U(16, 8, 3); +static U16X8X4: Type = Type::U(16, 8, 4); +static U32X2: Type = Type::U(32, 2, 1); +static U32X2X2: Type = Type::U(32, 2, 2); +static U32X2X3: Type = Type::U(32, 2, 3); +static U32X2X4: Type = Type::U(32, 2, 4); +static U32X4: Type = Type::U(32, 4, 1); +static U32X4X2: Type = Type::U(32, 4, 2); +static U32X4X3: Type = Type::U(32, 4, 3); +static U32X4X4: Type = Type::U(32, 4, 4); +static U64X1: Type = Type::U(64, 1, 1); +static U64X1X2: Type = Type::U(64, 1, 2); +static U64X1X3: Type = Type::U(64, 1, 3); +static U64X1X4: Type = Type::U(64, 1, 4); +static U64X2: Type = Type::U(64, 2, 1); +static U64X2X2: Type = Type::U(64, 2, 2); +static U64X2X3: Type = Type::U(64, 2, 3); +static U64X2X4: Type = Type::U(64, 2, 4); +static U8X16: Type = Type::U(8, 16, 1); +static U8X16X2: Type = Type::U(8, 16, 2); +static U8X16X3: Type = Type::U(8, 16, 3); +static U8X16X4: Type = Type::U(8, 16, 4); +static U8X8: Type = Type::U(8, 8, 1); +static U8X8X2: Type = Type::U(8, 8, 2); +static U8X8X3: Type = Type::U(8, 8, 3); +static U8X8X4: Type = Type::U(8, 8, 4); + +#[derive(Debug, Copy, Clone, PartialEq)] +enum Type { + PrimFloat(u8), + PrimSigned(u8), + PrimUnsigned(u8), + PrimPoly(u8), + MutPtr(&'static Type), + ConstPtr(&'static Type), + I(u8, u8, u8), + U(u8, u8, u8), + P(u8, u8, u8), + F(u8, u8, u8), + Never, +} + +stdarch_verify::arm_functions!(static FUNCTIONS); + +macro_rules! bail { + ($($t:tt)*) => (return Err(format!($($t)*))) +} + +#[test] +fn verify_all_signatures() { + // This is a giant HTML blob downloaded from + // https://developer.arm.com/technologies/neon/intrinsics which contains all + // NEON intrinsics at least. We do manual HTML parsing below. + let html = include_bytes!("../arm-intrinsics.html"); + let mut html = &html[..]; + let opts = ParseOpts { + tree_builder: TreeBuilderOpts { + drop_doctype: true, + ..Default::default() + }, + ..Default::default() + }; + let dom = parse_document(RcDom::default(), opts) + .from_utf8() + .read_from(&mut html) + .unwrap(); + + let accordion = find_accordion(&dom.document).unwrap(); + let map = parse_intrinsics(&accordion); + + let mut all_valid = true; + 'outer: for rust in FUNCTIONS { + // Skip some intrinsics that aren't NEON and are located in different + // places than the whitelists below. + match rust.name { + "brk" | "__breakpoint" | "udf" => continue, + _ => {} + } + let arm = match map.get(rust.name) { + Some(i) => i, + None => { + // Skip all these intrinsics as they're not listed in NEON + // descriptions online. + // + // TODO: we still need to verify these intrinsics or find a + // reference for them, need to figure out where though! + if !rust.file.ends_with("dsp.rs\"") + && !rust.file.ends_with("cmsis.rs\"") + && !rust.file.ends_with("v6.rs\"") + && !rust.file.ends_with("v7.rs\"") + && !rust.file.ends_with("v8.rs\"") + { + println!( + "missing arm definition for {:?} in {}", + rust.name, rust.file + ); + all_valid = false; + } + continue; + } + }; + + if let Err(e) = matches(rust, arm) { + println!("failed to verify `{}`", rust.name); + println!(" * {}", e); + all_valid = false; + } + } + assert!(all_valid); +} + +fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> { + if rust.ret != arm.ret.as_ref() { + bail!("mismatched return value") + } + if rust.arguments.len() != arm.arguments.len() { + bail!("mismatched argument lengths"); + } + + let mut nconst = 0; + let iter = rust.arguments.iter().zip(&arm.arguments).enumerate(); + for (i, (rust_ty, (arm, arm_const))) in iter { + if *rust_ty != arm { + bail!("mismatched arguments") + } + if *arm_const { + nconst += 1; + if !rust.required_const.contains(&i) { + bail!("argument const mismatch"); + } + } + } + if nconst != rust.required_const.len() { + bail!("wrong number of const arguments"); + } + + if rust.instrs.is_empty() { + bail!( + "instruction not listed for `{}`, but arm lists {:?}", + rust.name, + arm.instruction + ); + } else if false + /* not super reliable, but can be used to manually check */ + { + for instr in rust.instrs { + if arm.instruction.starts_with(instr) { + continue; + } + // sometimes arm says `foo` and disassemblers say `vfoo`, or + // sometimes disassemblers say `vfoo` and arm says `sfoo` or `ffoo` + if instr.starts_with("v") + && (arm.instruction.starts_with(&instr[1..]) + || arm.instruction[1..].starts_with(&instr[1..])) + { + continue; + } + bail!( + "arm failed to list `{}` as an instruction for `{}` in {:?}", + instr, + rust.name, + arm.instruction, + ); + } + } + + // TODO: verify `target_feature`. + + Ok(()) +} + +fn find_accordion(node: &Rc) -> Option> { + if let NodeData::Element { attrs, .. } = &node.data { + for attr in attrs.borrow().iter() { + if attr.name.local.eq_str_ignore_ascii_case("class") { + if attr.value.to_string() == "intrinsic-accordion" { + return Some(node.clone()); + } + } + } + } + + node.children + .borrow() + .iter() + .filter_map(|node| find_accordion(node)) + .next() +} + +#[derive(PartialEq)] +struct Intrinsic { + name: String, + ret: Option, + arguments: Vec<(Type, bool)>, + instruction: String, +} + +fn parse_intrinsics(node: &Rc) -> HashMap { + let mut ret = HashMap::new(); + for child in node.children.borrow().iter() { + if let NodeData::Element { .. } = child.data { + let f = parse_intrinsic(child); + ret.insert(f.name.clone(), f); + } + } + return ret; +} + +fn parse_intrinsic(node: &Rc) -> Intrinsic { + //
+ // ... + //